User Inputs

output.var = params$output.var 

transform.abs = FALSE
log.pred = params$log.pred
norm.pred = params$norm.pred
eda = params$eda
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 9
##  $ output.var         : chr "y3"
##  $ log.pred           : logi TRUE
##  $ norm.pred          : logi FALSE
##  $ eda                : logi FALSE
##  $ algo.forward.caret : logi TRUE
##  $ algo.backward.caret: logi TRUE
##  $ algo.stepwise.caret: logi TRUE
##  $ algo.LASSO.caret   : logi TRUE
##  $ algo.LARS.caret    : logi TRUE
# Setup Labels
#output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.log') else  output.var.tr = output.var
output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.cuberoot') else  output.var.tr = output.var
# output.var.tr = if (norm.pred == TRUE)  paste0(output.var,'.bestnorm') else  output.var.tr = output.var

Loading Data

feat  = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')

Data validation

cc  = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
##       JobName           y3        
##  Job_00001:   1   Min.   : 95.91  
##  Job_00002:   1   1st Qu.:118.29  
##  Job_00003:   1   Median :124.03  
##  Job_00004:   1   Mean   :125.40  
##  Job_00007:   1   3rd Qu.:131.06  
##  Job_00008:   1   Max.   :193.73  
##  (Other)  :6974

Output Variable

The Output Variable y3 shows right skewness, so will proceed with a log transformation

Histogram

df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() 

  #stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  

QQPlot

ggplot(gather(select_at(data,output.var)), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Trasformation of Output Variable from y3 to y3.cuberoot

#if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
if(log.pred==TRUE) data[[output.var.tr]] = (data[[output.var]])^(1/3) else
  data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=2)

ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Best Normalizator y3

Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project

if (norm.pred == TRUE){
  t=bestNormalize::bestNormalize(data[[output.var]])
  t
  qqnorm(data[[output.var]])
  qqnorm(predict(t))
  data[[output.var.tr]] = predict(t)
}

orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution

Predictors

Feature Engineering

data$x2byx1 = data$x2/data$x1
data$x6byx5 = data$x6/data$x5
data$x9byx7 = data$x9/data$x7
data$x10byx8 = data$x10/data$x8
data$x14byx12 = data$x14/data$x12
data$x15byx13 = data$x15/data$x13
data$x17byx16 = data$x17/data$x16
data$x19byx18 = data$x19/data$x18
data$x21byx20 = data$x21/data$x20
data$x23byx22 = data$x23/data$x22
data$x1log = log(data$x1)
data$x2log = log(data$x2)
data$x5log = log(data$x5)
data$x6log = log(data$x6)
data$x7log = log(data$x7)
data$x9log = log(data$x9)
data$x8log = log(data$x8)
data$x10log = log(data$x10)
data$x12log = log(data$x12)
data$x14log = log(data$x14)
data$x13log = log(data$x13)
data$x15log = log(data$x15)
data$x16log = log(data$x16)
data$x17log = log(data$x17)
data$x18log = log(data$x18)
data$x19log = log(data$x19)
data$x20log = log(data$x20)
data$x21log = log(data$x21)
data$x22log = log(data$x22)
data$x23log = log(data$x23)
data$x11log = log(data$x11)
data$x1sqinv = 1/(data$x1)^2 
data$x5sqinv = 1/(data$x5)^2 
data$x7sqinv = 1/(data$x7)^2 
data$x8sqinv = 1/(data$x8)^2 
data$x12sqinv = 1/(data$x12)^2 
data$x13sqinv = 1/(data$x13)^2 
data$x16sqinv = 1/(data$x16)^2 
data$x18sqinv = 1/(data$x18)^2 
data$x20sqinv = 1/(data$x20)^2 
data$x22sqinv = 1/(data$x22)^2 
predictors
##   [1] "x1"      "x2"      "x3"      "x4"      "x5"      "x6"      "x7"      "x8"      "x9"      "x10"     "x11"    
##  [12] "x12"     "x13"     "x14"     "x15"     "x16"     "x17"     "x18"     "x19"     "x20"     "x21"     "x22"    
##  [23] "x23"     "stat1"   "stat2"   "stat3"   "stat4"   "stat5"   "stat6"   "stat7"   "stat8"   "stat9"   "stat10" 
##  [34] "stat11"  "stat12"  "stat13"  "stat14"  "stat15"  "stat16"  "stat17"  "stat18"  "stat19"  "stat20"  "stat21" 
##  [45] "stat22"  "stat23"  "stat24"  "stat25"  "stat26"  "stat27"  "stat28"  "stat29"  "stat30"  "stat31"  "stat32" 
##  [56] "stat33"  "stat34"  "stat35"  "stat36"  "stat37"  "stat38"  "stat39"  "stat40"  "stat41"  "stat42"  "stat43" 
##  [67] "stat44"  "stat45"  "stat46"  "stat47"  "stat48"  "stat49"  "stat50"  "stat51"  "stat52"  "stat53"  "stat54" 
##  [78] "stat55"  "stat56"  "stat57"  "stat58"  "stat59"  "stat60"  "stat61"  "stat62"  "stat63"  "stat64"  "stat65" 
##  [89] "stat66"  "stat67"  "stat68"  "stat69"  "stat70"  "stat71"  "stat72"  "stat73"  "stat74"  "stat75"  "stat76" 
## [100] "stat77"  "stat78"  "stat79"  "stat80"  "stat81"  "stat82"  "stat83"  "stat84"  "stat85"  "stat86"  "stat87" 
## [111] "stat88"  "stat89"  "stat90"  "stat91"  "stat92"  "stat93"  "stat94"  "stat95"  "stat96"  "stat97"  "stat98" 
## [122] "stat99"  "stat100" "stat101" "stat102" "stat103" "stat104" "stat105" "stat106" "stat107" "stat108" "stat109"
## [133] "stat110" "stat111" "stat112" "stat113" "stat114" "stat115" "stat116" "stat117" "stat118" "stat119" "stat120"
## [144] "stat121" "stat122" "stat123" "stat124" "stat125" "stat126" "stat127" "stat128" "stat129" "stat130" "stat131"
## [155] "stat132" "stat133" "stat134" "stat135" "stat136" "stat137" "stat138" "stat139" "stat140" "stat141" "stat142"
## [166] "stat143" "stat144" "stat145" "stat146" "stat147" "stat148" "stat149" "stat150" "stat151" "stat152" "stat153"
## [177] "stat154" "stat155" "stat156" "stat157" "stat158" "stat159" "stat160" "stat161" "stat162" "stat163" "stat164"
## [188] "stat165" "stat166" "stat167" "stat168" "stat169" "stat170" "stat171" "stat172" "stat173" "stat174" "stat175"
## [199] "stat176" "stat177" "stat178" "stat179" "stat180" "stat181" "stat182" "stat183" "stat184" "stat185" "stat186"
## [210] "stat187" "stat188" "stat189" "stat190" "stat191" "stat192" "stat193" "stat194" "stat195" "stat196" "stat197"
## [221] "stat198" "stat199" "stat200" "stat201" "stat202" "stat203" "stat204" "stat205" "stat206" "stat207" "stat208"
## [232] "stat209" "stat210" "stat211" "stat212" "stat213" "stat214" "stat215" "stat216" "stat217"
controlled.vars = colnames(data)[grep("^x",colnames(data))]
stat.vars = colnames(data)[grep("^stat",colnames(data))]

predictors = c(controlled.vars,stat.vars)
predictors
##   [1] "x1"       "x2"       "x3"       "x4"       "x5"       "x6"       "x7"       "x8"       "x9"       "x10"     
##  [11] "x11"      "x12"      "x13"      "x14"      "x15"      "x16"      "x17"      "x18"      "x19"      "x20"     
##  [21] "x21"      "x22"      "x23"      "x2byx1"   "x6byx5"   "x9byx7"   "x10byx8"  "x14byx12" "x15byx13" "x17byx16"
##  [31] "x19byx18" "x21byx20" "x23byx22" "x1log"    "x2log"    "x5log"    "x6log"    "x7log"    "x9log"    "x8log"   
##  [41] "x10log"   "x12log"   "x14log"   "x13log"   "x15log"   "x16log"   "x17log"   "x18log"   "x19log"   "x20log"  
##  [51] "x21log"   "x22log"   "x23log"   "x11log"   "x1sqinv"  "x5sqinv"  "x7sqinv"  "x8sqinv"  "x12sqinv" "x13sqinv"
##  [61] "x16sqinv" "x18sqinv" "x20sqinv" "x22sqinv" "stat1"    "stat2"    "stat3"    "stat4"    "stat5"    "stat6"   
##  [71] "stat7"    "stat8"    "stat9"    "stat10"   "stat11"   "stat12"   "stat13"   "stat14"   "stat15"   "stat16"  
##  [81] "stat17"   "stat18"   "stat19"   "stat20"   "stat21"   "stat22"   "stat23"   "stat24"   "stat25"   "stat26"  
##  [91] "stat27"   "stat28"   "stat29"   "stat30"   "stat31"   "stat32"   "stat33"   "stat34"   "stat35"   "stat36"  
## [101] "stat37"   "stat38"   "stat39"   "stat40"   "stat41"   "stat42"   "stat43"   "stat44"   "stat45"   "stat46"  
## [111] "stat47"   "stat48"   "stat49"   "stat50"   "stat51"   "stat52"   "stat53"   "stat54"   "stat55"   "stat56"  
## [121] "stat57"   "stat58"   "stat59"   "stat60"   "stat61"   "stat62"   "stat63"   "stat64"   "stat65"   "stat66"  
## [131] "stat67"   "stat68"   "stat69"   "stat70"   "stat71"   "stat72"   "stat73"   "stat74"   "stat75"   "stat76"  
## [141] "stat77"   "stat78"   "stat79"   "stat80"   "stat81"   "stat82"   "stat83"   "stat84"   "stat85"   "stat86"  
## [151] "stat87"   "stat88"   "stat89"   "stat90"   "stat91"   "stat92"   "stat93"   "stat94"   "stat95"   "stat96"  
## [161] "stat97"   "stat98"   "stat99"   "stat100"  "stat101"  "stat102"  "stat103"  "stat104"  "stat105"  "stat106" 
## [171] "stat107"  "stat108"  "stat109"  "stat110"  "stat111"  "stat112"  "stat113"  "stat114"  "stat115"  "stat116" 
## [181] "stat117"  "stat118"  "stat119"  "stat120"  "stat121"  "stat122"  "stat123"  "stat124"  "stat125"  "stat126" 
## [191] "stat127"  "stat128"  "stat129"  "stat130"  "stat131"  "stat132"  "stat133"  "stat134"  "stat135"  "stat136" 
## [201] "stat137"  "stat138"  "stat139"  "stat140"  "stat141"  "stat142"  "stat143"  "stat144"  "stat145"  "stat146" 
## [211] "stat147"  "stat148"  "stat149"  "stat150"  "stat151"  "stat152"  "stat153"  "stat154"  "stat155"  "stat156" 
## [221] "stat157"  "stat158"  "stat159"  "stat160"  "stat161"  "stat162"  "stat163"  "stat164"  "stat165"  "stat166" 
## [231] "stat167"  "stat168"  "stat169"  "stat170"  "stat171"  "stat172"  "stat173"  "stat174"  "stat175"  "stat176" 
## [241] "stat177"  "stat178"  "stat179"  "stat180"  "stat181"  "stat182"  "stat183"  "stat184"  "stat185"  "stat186" 
## [251] "stat187"  "stat188"  "stat189"  "stat190"  "stat191"  "stat192"  "stat193"  "stat194"  "stat195"  "stat196" 
## [261] "stat197"  "stat198"  "stat199"  "stat200"  "stat201"  "stat202"  "stat203"  "stat204"  "stat205"  "stat206" 
## [271] "stat207"  "stat208"  "stat209"  "stat210"  "stat211"  "stat212"  "stat213"  "stat214"  "stat215"  "stat216" 
## [281] "stat217"

All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)

Interesting Predictors

Histograms

if (eda == TRUE){
  cols = c('x11','x18','stat98','x7','stat110')
  df=gather(select_at(data,cols))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=3)
  
  # ggplot(gather(select_at(data,cols)), aes(sample=value)) + 
  #   stat_qq()+
  #   facet_wrap(~key, scales = 'free',ncol=2)
  
  lapply(select_at(data,cols),summary)
}

Scatter plot vs. output variable **y3.cuberoot

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light green',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=3)
}

All Predictors

Histograms

All indicators have a strong indication of Fat-Tails

if (eda == TRUE){
  df=gather(select_at(data,predictors))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=4)
}

Correlations

With Output Variable

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  # https://stackoverflow.com/questions/27034655/how-to-use-dplyrarrangedesc-when-using-a-string-as-column-name
  t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
                            ,select_at(data,output.var.tr)),4))  %>%
    #rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
    rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-!!sym(output.var.tr))
  #DT::datatable(t)
  message("Top Positive")
  #kable(head(arrange(t,desc(y3.log)),20))
  kable(head(arrange(t,desc(!!sym(output.var.tr))),20))
  message("Top Negative")
  #kable(head(arrange(t,y3.log),20))
  kable(head(arrange(t,!!sym(output.var.tr)),20))
}

Between All Variables

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
  #DT::datatable(t,options=list(scrollX=T))
  message("Showing only 10 variables")
  kable(t[1:10,1:10])
}

Scatter Plots with Output Variable

Scatter plots with all predictors and the output variable (y3.cuberoot)

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light blue',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=4)
}

Multicollinearity - VIF

No Multicollinearity among predictors

Showing Top predictor by VIF Value

if (eda == TRUE){
  vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
  head(vifDF,75)
}

Feature Eng

  • Square Root transformation for x18
data.tr=data %>%
  mutate(x18.sqrt = sqrt(x18)) 
cols=c('x18','x18.sqrt')

Comparing Pre and Post Transformation Density Plots

# ggplot(gather(select_at(data.tr,cols)), aes(value)) + 
#   geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
#   geom_density() + 
#   facet_wrap(~key, scales = 'free',ncol=4)

d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light blue',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#removing unwanted variables
data.tr=data.tr %>%
  #dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])
  dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('JobName')])

data=data.tr
label.names=output.var.tr

Modeling

PCA

# 0 for no interaction, 
# 1 for Full 2 way interaction and 
# 2 for Selective 2 way interaction
# 3 for Selective 3 way interaction
InteractionMode = 2

pca.vars  = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]


# http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
# #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
registerDoParallel(cl)

if(InteractionMode == 1){
  pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
  #saveRDS(pca.model,'pca.model.rds')
}
if (InteractionMode == 0){
  pca.model =  prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
if (InteractionMode >= 2 & InteractionMode <= 3){
  controlled.vars = pca.vars[grep("^x",pca.vars)]
  stat.vars = pca.vars[grep("^stat",pca.vars)]
  
  if (InteractionMode >= 2){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^2')
  }
  if (InteractionMode >= 3){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^3')
  }
  no.interact.form = paste0(stat.vars, collapse ='+')
  
  pca.formula = as.formula(paste(interaction.form, no.interact.form, sep = "+"))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
}

stopCluster(cl)
registerDoSEQ() # register sequential engine in case you are not using this function anymore
targetCumVar = .9

pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 164 PCAs justify 90.0% of the total Variance. (90.0%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained",   type='b')

plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')

screeplot(pca.model,npcs = pca.model$pcaSelCount)

screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')

#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>% 
  dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
                                 ,!!colnames(pca.model$rotation)[pca.model$pcaSel])
  )

Train Test Split

data.pca = data.pca[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)

data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)

Common Functions

plot.diagnostics <-  function(model, train) {
  plot(model)
  
  residuals = resid(model) # Plotted above in plot(lm.out)
  r.standard = rstandard(model)
  r.student = rstudent(model)
  
  df = data.frame(x=predict(model,train),y=r.student)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = 0,size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  
  df = data.frame(x=predict(model,train),y=r.standard)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = c(-2,0,2),size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  # Histogram
  df=data.frame(r.student)
  p=ggplot(data=df,aes(r.student)) +
    geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) + 
    stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
    ylab("Density")+
    xlab("Studentized Residuals")+
    ggtitle("Distribution of Studentized Residuals")
  plot(p)
  # http://www.stat.columbia.edu/~martin/W2024/R7.pdf
  # Influential plots
  inf.meas = influence.measures(model)
  # print (summary(inf.meas)) # too much data
  
  # Leverage plot
  lev = hat(model.matrix(model))
  df=tibble::rownames_to_column(as.data.frame(lev),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    ylab('Leverage - check') + 
    xlab('Index')
  plot(p)
  # Cook's Distance
  cd = cooks.distance(model)
  df=tibble::rownames_to_column(as.data.frame(cd),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
    ylab('Cooks distances') + 
    geom_hline(yintercept = c(4/nrow(train),0),size=1)+
    xlab('Index')
  plot(p)
  print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = "")) 
  print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = "")) 
  return(cd)
}

# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html 
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
  #B is the number of resamples and integer vector of M (numbers + tune length if any)
  B <- if (method == "cv") numbers
  else if(method == "repeatedcv") numbers * repeats
  else NULL
  if(is.null(length)) {
    seeds <- NULL
  } else {
    set.seed(seed = seed)
    seeds <- vector(mode = "list", length = B)
    seeds <- lapply(seeds, function(x) sample.int(n = 1000000
                                                  , size = numbers + ifelse(is.null(tunes), 0, tunes)))
    seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
  }
  # return seeds
  seeds
}



train.caret.glmselect = function(formula, data, method
                                 ,subopt = NULL, feature.names
                                 , train.control = NULL, tune.grid = NULL, pre.proc = NULL){
  
  if(is.null(train.control)){
    train.control <- trainControl(method = "cv"
                              ,number = 10
                              ,seeds = setCaretSeeds(method = "cv"
                                                     , numbers = 10
                                                     , seed = 1701)
                              ,search = "grid"
                              ,verboseIter = TRUE
                              ,allowParallel = TRUE
                              )
  }
  
  if(is.null(tune.grid)){
    if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
      tune.grid = data.frame(nvmax = 1:length(feature.names))
    }
    if (method == 'glmnet' && subopt == 'LASSO'){
      # Will only show 1 Lambda value during training, but that is OK
      # https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
      # Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
      lambda = 10^seq(-2,0, length =100)
      alpha = c(1)
      tune.grid = expand.grid(alpha = alpha,lambda = lambda)
    }
    if (method == 'lars'){
      # https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
      fraction = seq(0, 1, length = 100)
      tune.grid = expand.grid(fraction = fraction)
      pre.proc = c("center", "scale") 
    }
  }
  
  # http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
  # #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
  cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
  registerDoParallel(cl)

  set.seed(1) 
  # note that the seed has to actually be set just before this function is called
  # settign is above just not ensure reproducibility for some reason
  model.caret <- caret::train(formula
                              , data = data
                              , method = method
                              , tuneGrid = tune.grid
                              , trControl = train.control
                              , preProc = pre.proc
                              )
  
  stopCluster(cl)
  registerDoSEQ() # register sequential engine in case you are not using this function anymore
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    print("All models results")
    print(model.caret$results) # all model results
    print("Best Model")
    print(model.caret$bestTune) # best model
    model = model.caret$finalModel

    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-nvmax) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    # leap function does not support studentized residuals
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)
   
    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    id = rownames(model.caret$bestTune)    
    # Provides the coefficients of the best model
    # regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
    # https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
    print("Coefficients of final model:")
    coefs <- coef(model, id=id)
    #calculate the model to the the coef intervals
    nams <- names(coefs)
    nams <- nams[!nams %in% "(Intercept)"]
    response <-  as.character(formula[[2]])
    form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
    mod <- lm(form, data = data)
    #coefs
    #coef(mod)
    print(car::Confint(mod))
    return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
                ,modelLM=mod))
  }
  if (method == 'glmnet' && subopt == 'LASSO'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    print(model.caret$results)
    model=model.caret$finalModel
    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-lambda) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot 
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') +
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    #no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
    t=coef(model,s=model.caret$bestTune$lambda)
    model.coef = t[which(t[,1]!=0),]
    print(as.data.frame(model.coef))
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
  }
  if (method == 'lars'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    # Metrics Plot
    dataPlot = model.caret$results %>%
        gather(key='metric',value='value',-fraction) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
    model.coef = t[which(t!=0)]
    print(model.coef)
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
  }
}

# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
    #form <- as.formula(object$call[[2]])
    mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
    coefi <- coef(object, id = id)
    xvars <- names(coefi)
    return(mat[,xvars]%*%coefi)
}
  
test.model = function(model, test, level=0.95
                      ,draw.limits = FALSE, good = 0.1, ok = 0.15
                      ,method = NULL, subopt = NULL
                      ,id = NULL, formula, feature.names, label.names
                      ,transformation = NULL){
  ## if using caret for glm select equivalent functionality, 
  ## need to pass formula (full is ok as it will select subset of variables from there)
  if (is.null(method)){
    pred = predict(model, newdata=test, interval="confidence", level = level) 
  }
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
  }
  
  if (method == 'glmnet' && subopt == 'LASSO'){
    xtest = as.matrix(test[,feature.names]) 
    pred=as.data.frame(predict(model, xtest))
  }
  
  if (method == 'lars'){
    pred=as.data.frame(predict(model, newdata = test))
  }
    
  # Summary of predicted values
  print ("Summary of predicted values: ")
  print(summary(pred[,1]))

  test.mse = mean((test[,label.names]-pred[,1])^2)
  print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
  
  test.rmse = sqrt(test.mse)
  print (paste(method, subopt, "Test RMSE:", test.rmse, sep=" "))
  
  if(log.pred == TRUE || norm.pred == TRUE){
    # plot transformewd comparison first
    df=data.frame(x=test[,label.names],y=pred[,1])
    ggplot(df,aes(x=x,y=y)) +
      geom_point(color='blue',alpha=0.5,shape=20,size=2) +
      geom_abline(slope=1,intercept=0,color='black',size=1) +
      #scale_y_continuous(limits=c(min(df),max(df)))+
      xlab("Actual (Transformed)")+
      ylab("Predicted (Transformed)")
  }
    
  if (log.pred == FALSE && norm.pred == FALSE){
    x = test[,label.names]
    y = pred[,1]
  }
  if (log.pred == TRUE){
    # x = 10^test[,label.names]
    # y = 10^pred[,1]
    x = (test[,label.names])^3
    y = (pred[,1])^3
  }
  if (norm.pred == TRUE){
    x = predict(transformation, test[,label.names], inverse = TRUE)
    y = predict(transformation, pred[,1], inverse = TRUE)
  }

  test.mse = mean((x-y)^2)
  print (paste(method, subopt, "Test MSE (Org Scale):", test.mse, sep=" "))
  
  test.rmse = sqrt(test.mse)
  print (paste(method, subopt, "Test RMSE (Org Scale):", test.rmse, sep=" "))

  df=data.frame(x,y)
  ggplot(df,aes(x,y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
                ,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
    #scale_y_continuous(limits=c(min(df),max(df)))+
    xlab("Actual")+
    ylab("Predicted")
    
 
}

Setup Formulae

n <- names(data.train)
 formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
                             ," ~", paste(n[!n %in% label.names], collapse = " + "))) 

grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))

print(formula)
## y3.cuberoot ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + 
##     PC9 + PC10 + PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + 
##     PC18 + PC19 + PC20 + PC21 + PC22 + PC23 + PC24 + PC25 + PC26 + 
##     PC27 + PC28 + PC29 + PC30 + PC31 + PC32 + PC33 + PC34 + PC35 + 
##     PC36 + PC37 + PC38 + PC39 + PC40 + PC41 + PC42 + PC43 + PC44 + 
##     PC45 + PC46 + PC47 + PC48 + PC49 + PC50 + PC51 + PC52 + PC53 + 
##     PC54 + PC55 + PC56 + PC57 + PC58 + PC59 + PC60 + PC61 + PC62 + 
##     PC63 + PC64 + PC65 + PC66 + PC67 + PC68 + PC69 + PC70 + PC71 + 
##     PC72 + PC73 + PC74 + PC75 + PC76 + PC77 + PC78 + PC79 + PC80 + 
##     PC81 + PC82 + PC83 + PC84 + PC85 + PC86 + PC87 + PC88 + PC89 + 
##     PC90 + PC91 + PC92 + PC93 + PC94 + PC95 + PC96 + PC97 + PC98 + 
##     PC99 + PC100 + PC101 + PC102 + PC103 + PC104 + PC105 + PC106 + 
##     PC107 + PC108 + PC109 + PC110 + PC111 + PC112 + PC113 + PC114 + 
##     PC115 + PC116 + PC117 + PC118 + PC119 + PC120 + PC121 + PC122 + 
##     PC123 + PC124 + PC125 + PC126 + PC127 + PC128 + PC129 + PC130 + 
##     PC131 + PC132 + PC133 + PC134 + PC135 + PC136 + PC137 + PC138 + 
##     PC139 + PC140 + PC141 + PC142 + PC143 + PC144 + PC145 + PC146 + 
##     PC147 + PC148 + PC149 + PC150 + PC151 + PC152 + PC153 + PC154 + 
##     PC155 + PC156 + PC157 + PC158 + PC159 + PC160 + PC161 + PC162 + 
##     PC163 + PC164
print(grand.mean.formula)
## y3.cuberoot ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]

Full Model

model.full = lm(formula , data.train)
summary(model.full)
## 
## Call:
## lm(formula = formula, data = data.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33559 -0.08715 -0.02338  0.06432  0.76105 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  5.001e+00  1.666e-03 3001.612  < 2e-16 ***
## PC1         -1.663e-03  1.446e-04  -11.506  < 2e-16 ***
## PC2         -3.578e-03  1.466e-04  -24.411  < 2e-16 ***
## PC3         -1.544e-03  1.469e-04  -10.511  < 2e-16 ***
## PC4         -1.223e-03  1.495e-04   -8.181 3.49e-16 ***
## PC5          7.730e-04  1.548e-04    4.992 6.15e-07 ***
## PC6         -4.598e-04  1.547e-04   -2.973 0.002961 ** 
## PC7         -7.426e-04  1.583e-04   -4.690 2.80e-06 ***
## PC8         -2.100e-04  1.603e-04   -1.310 0.190123    
## PC9         -1.787e-04  1.655e-04   -1.080 0.280040    
## PC10        -7.822e-05  1.678e-04   -0.466 0.641203    
## PC11        -2.115e-03  1.803e-04  -11.734  < 2e-16 ***
## PC12        -1.758e-03  1.908e-04   -9.213  < 2e-16 ***
## PC13         1.148e-03  1.939e-04    5.923 3.35e-09 ***
## PC14         9.398e-04  2.005e-04    4.687 2.84e-06 ***
## PC15        -1.018e-04  2.040e-04   -0.499 0.617907    
## PC16         1.339e-03  2.050e-04    6.531 7.13e-11 ***
## PC17        -7.998e-04  2.178e-04   -3.673 0.000242 ***
## PC18        -1.469e-03  2.274e-04   -6.460 1.14e-10 ***
## PC19         5.410e-05  2.275e-04    0.238 0.812015    
## PC20         1.721e-03  2.500e-04    6.885 6.42e-12 ***
## PC21         2.477e-04  2.600e-04    0.952 0.340926    
## PC22         5.364e-04  4.069e-04    1.318 0.187420    
## PC23         1.301e-03  5.015e-04    2.594 0.009507 ** 
## PC24        -3.279e-03  5.897e-04   -5.561 2.81e-08 ***
## PC25         1.098e-03  6.577e-04    1.670 0.094941 .  
## PC26         1.433e-03  6.767e-04    2.118 0.034233 *  
## PC27         1.618e-03  6.810e-04    2.377 0.017497 *  
## PC28         5.896e-04  6.919e-04    0.852 0.394178    
## PC29         1.544e-03  7.568e-04    2.040 0.041401 *  
## PC30         3.520e-04  7.711e-04    0.457 0.648001    
## PC31        -9.859e-04  8.310e-04   -1.186 0.235497    
## PC32        -2.748e-03  8.393e-04   -3.274 0.001068 ** 
## PC33         1.251e-03  8.585e-04    1.457 0.145139    
## PC34         4.009e-03  9.073e-04    4.418 1.01e-05 ***
## PC35         2.322e-04  9.631e-04    0.241 0.809491    
## PC36        -2.440e-04  9.771e-04   -0.250 0.802821    
## PC37        -1.401e-03  1.016e-03   -1.379 0.167872    
## PC38         1.095e-03  1.057e-03    1.036 0.300123    
## PC39        -5.246e-04  1.073e-03   -0.489 0.624800    
## PC40        -3.557e-04  1.078e-03   -0.330 0.741390    
## PC41         2.163e-04  1.099e-03    0.197 0.844009    
## PC42         1.854e-04  1.112e-03    0.167 0.867517    
## PC43         2.383e-04  1.127e-03    0.211 0.832595    
## PC44         1.721e-03  1.122e-03    1.534 0.125092    
## PC45        -1.304e-03  1.124e-03   -1.160 0.246013    
## PC46         2.240e-04  1.142e-03    0.196 0.844474    
## PC47        -1.670e-03  1.149e-03   -1.454 0.146054    
## PC48         1.224e-03  1.172e-03    1.045 0.296123    
## PC49         1.122e-03  1.174e-03    0.956 0.339245    
## PC50        -6.369e-04  1.183e-03   -0.538 0.590299    
## PC51         7.903e-04  1.190e-03    0.664 0.506781    
## PC52         1.773e-05  1.195e-03    0.015 0.988165    
## PC53         2.587e-04  1.197e-03    0.216 0.828970    
## PC54        -2.819e-04  1.205e-03   -0.234 0.815016    
## PC55        -2.113e-04  1.206e-03   -0.175 0.860959    
## PC56         6.500e-04  1.226e-03    0.530 0.595969    
## PC57        -1.623e-03  1.234e-03   -1.315 0.188672    
## PC58         8.245e-04  1.228e-03    0.672 0.501823    
## PC59         2.455e-03  1.219e-03    2.013 0.044130 *  
## PC60        -1.304e-03  1.245e-03   -1.047 0.295265    
## PC61         1.976e-04  1.231e-03    0.160 0.872529    
## PC62        -1.822e-04  1.256e-03   -0.145 0.884688    
## PC63        -2.238e-03  1.255e-03   -1.783 0.074588 .  
## PC64        -1.993e-03  1.261e-03   -1.581 0.114014    
## PC65        -1.148e-03  1.257e-03   -0.913 0.361358    
## PC66        -2.351e-03  1.275e-03   -1.844 0.065179 .  
## PC67        -2.757e-04  1.287e-03   -0.214 0.830429    
## PC68         2.589e-03  1.284e-03    2.017 0.043774 *  
## PC69         1.592e-03  1.291e-03    1.233 0.217541    
## PC70        -4.740e-04  1.297e-03   -0.365 0.714790    
## PC71         2.825e-03  1.291e-03    2.187 0.028753 *  
## PC72         7.206e-05  1.305e-03    0.055 0.955951    
## PC73         7.575e-04  1.313e-03    0.577 0.564079    
## PC74        -1.595e-03  1.318e-03   -1.210 0.226182    
## PC75        -3.415e-03  1.325e-03   -2.578 0.009967 ** 
## PC76         3.557e-04  1.326e-03    0.268 0.788493    
## PC77         1.765e-03  1.326e-03    1.331 0.183370    
## PC78         1.473e-03  1.331e-03    1.106 0.268645    
## PC79         2.454e-03  1.344e-03    1.825 0.067987 .  
## PC80        -1.258e-03  1.367e-03   -0.920 0.357415    
## PC81         3.799e-03  1.363e-03    2.788 0.005324 ** 
## PC82         6.021e-04  1.369e-03    0.440 0.660201    
## PC83        -2.566e-03  1.360e-03   -1.886 0.059316 .  
## PC84         3.514e-03  1.368e-03    2.568 0.010254 *  
## PC85         4.670e-03  1.389e-03    3.363 0.000777 ***
## PC86        -2.149e-03  1.384e-03   -1.552 0.120711    
## PC87         8.014e-03  1.402e-03    5.716 1.15e-08 ***
## PC88        -1.912e-03  1.422e-03   -1.344 0.178907    
## PC89        -2.577e-03  1.398e-03   -1.843 0.065369 .  
## PC90        -2.048e-03  1.405e-03   -1.458 0.144946    
## PC91        -1.752e-04  1.411e-03   -0.124 0.901196    
## PC92        -2.535e-04  1.423e-03   -0.178 0.858631    
## PC93        -4.790e-04  1.408e-03   -0.340 0.733639    
## PC94        -3.779e-03  1.418e-03   -2.665 0.007724 ** 
## PC95        -5.870e-04  1.430e-03   -0.411 0.681400    
## PC96        -3.851e-03  1.438e-03   -2.677 0.007444 ** 
## PC97        -1.783e-03  1.430e-03   -1.246 0.212653    
## PC98        -9.938e-04  1.436e-03   -0.692 0.489052    
## PC99        -2.122e-03  1.437e-03   -1.476 0.139911    
## PC100       -2.253e-04  1.436e-03   -0.157 0.875337    
## PC101       -3.588e-04  1.435e-03   -0.250 0.802572    
## PC102       -2.381e-03  1.457e-03   -1.634 0.102317    
## PC103        2.814e-03  1.445e-03    1.948 0.051496 .  
## PC104       -3.746e-03  1.451e-03   -2.581 0.009866 ** 
## PC105        2.963e-03  1.456e-03    2.035 0.041918 *  
## PC106        3.630e-03  1.450e-03    2.504 0.012315 *  
## PC107        1.162e-03  1.453e-03    0.800 0.423977    
## PC108        2.455e-05  1.462e-03    0.017 0.986606    
## PC109        2.111e-03  1.460e-03    1.446 0.148280    
## PC110       -5.179e-04  1.455e-03   -0.356 0.721947    
## PC111       -3.337e-03  1.470e-03   -2.270 0.023220 *  
## PC112       -4.860e-04  1.470e-03   -0.331 0.740945    
## PC113        1.610e-03  1.472e-03    1.093 0.274225    
## PC114       -2.852e-03  1.469e-03   -1.941 0.052266 .  
## PC115       -5.796e-03  1.480e-03   -3.917 9.08e-05 ***
## PC116       -6.925e-04  1.479e-03   -0.468 0.639695    
## PC117       -2.214e-04  1.470e-03   -0.151 0.880275    
## PC118        2.331e-03  1.486e-03    1.568 0.116913    
## PC119       -2.920e-03  1.485e-03   -1.966 0.049317 *  
## PC120        1.033e-03  1.483e-03    0.697 0.485922    
## PC121       -1.211e-05  1.484e-03   -0.008 0.993489    
## PC122        2.594e-03  1.494e-03    1.736 0.082572 .  
## PC123       -2.418e-03  1.498e-03   -1.614 0.106511    
## PC124        1.651e-03  1.501e-03    1.100 0.271557    
## PC125        2.036e-03  1.506e-03    1.353 0.176270    
## PC126        5.228e-04  1.492e-03    0.350 0.726015    
## PC127        2.308e-03  1.489e-03    1.550 0.121148    
## PC128       -1.919e-03  1.501e-03   -1.278 0.201285    
## PC129       -1.535e-04  1.503e-03   -0.102 0.918649    
## PC130        5.811e-04  1.516e-03    0.383 0.701498    
## PC131       -3.983e-03  1.510e-03   -2.638 0.008357 ** 
## PC132        1.160e-03  1.509e-03    0.768 0.442299    
## PC133       -1.385e-03  1.515e-03   -0.914 0.360657    
## PC134        4.552e-03  1.504e-03    3.027 0.002478 ** 
## PC135        3.018e-03  1.510e-03    1.999 0.045683 *  
## PC136        1.191e-03  1.526e-03    0.780 0.435280    
## PC137       -8.056e-04  1.525e-03   -0.528 0.597326    
## PC138        1.818e-03  1.530e-03    1.188 0.234897    
## PC139       -3.505e-03  1.521e-03   -2.304 0.021268 *  
## PC140       -1.269e-03  1.539e-03   -0.825 0.409519    
## PC141        1.223e-05  1.534e-03    0.008 0.993642    
## PC142        1.458e-05  1.539e-03    0.009 0.992443    
## PC143        2.076e-03  1.538e-03    1.350 0.177153    
## PC144        2.493e-03  1.535e-03    1.625 0.104270    
## PC145        1.769e-03  1.541e-03    1.148 0.251143    
## PC146        4.674e-03  1.547e-03    3.021 0.002530 ** 
## PC147       -9.552e-04  1.537e-03   -0.621 0.534354    
## PC148       -2.039e-03  1.530e-03   -1.333 0.182750    
## PC149        6.513e-04  1.553e-03    0.419 0.674907    
## PC150        8.207e-04  1.550e-03    0.529 0.596518    
## PC151        2.957e-03  1.556e-03    1.900 0.057473 .  
## PC152       -4.070e-04  1.565e-03   -0.260 0.794780    
## PC153        3.365e-03  1.550e-03    2.171 0.029948 *  
## PC154       -3.574e-03  1.557e-03   -2.295 0.021747 *  
## PC155        3.279e-03  1.559e-03    2.103 0.035469 *  
## PC156        3.411e-03  1.564e-03    2.181 0.029194 *  
## PC157        8.113e-04  1.568e-03    0.517 0.604866    
## PC158        7.014e-04  1.567e-03    0.448 0.654499    
## PC159        5.705e-03  1.559e-03    3.659 0.000255 ***
## PC160        9.694e-04  1.556e-03    0.623 0.533269    
## PC161        1.538e-03  1.559e-03    0.986 0.324207    
## PC162       -5.154e-03  1.579e-03   -3.265 0.001102 ** 
## PC163        2.629e-03  1.574e-03    1.670 0.094962 .  
## PC164        5.116e-04  1.568e-03    0.326 0.744258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1241 on 5419 degrees of freedom
## Multiple R-squared:  0.2498, Adjusted R-squared:  0.2271 
## F-statistic:    11 on 164 and 5419 DF,  p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)

## [1] "Number of data points that have Cook's D > 4/n: 270"
## [1] "Number of data points that have Cook's D > 1: 0"

Checking with removal of high influence points

high.cd = names(cd.full[cd.full > 4/nrow(data.train)])

#save dataset with high.cd flagged
t = data.train %>% 
  rownames_to_column() %>%
  mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
## 
## Call:
## lm(formula = formula, data = data.train2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.21882 -0.07427 -0.01502  0.06494  0.32144 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  4.987e+00  1.374e-03 3629.373  < 2e-16 ***
## PC1         -1.809e-03  1.218e-04  -14.848  < 2e-16 ***
## PC2         -3.501e-03  1.215e-04  -28.824  < 2e-16 ***
## PC3         -1.565e-03  1.224e-04  -12.792  < 2e-16 ***
## PC4         -1.386e-03  1.234e-04  -11.230  < 2e-16 ***
## PC5          7.595e-04  1.286e-04    5.907 3.70e-09 ***
## PC6         -3.108e-04  1.281e-04   -2.425 0.015330 *  
## PC7         -7.746e-04  1.310e-04   -5.915 3.54e-09 ***
## PC8         -1.928e-04  1.332e-04   -1.448 0.147764    
## PC9         -1.503e-05  1.373e-04   -0.109 0.912827    
## PC10         2.024e-05  1.392e-04    0.145 0.884340    
## PC11        -2.387e-03  1.488e-04  -16.040  < 2e-16 ***
## PC12        -1.838e-03  1.572e-04  -11.696  < 2e-16 ***
## PC13         1.124e-03  1.605e-04    7.001 2.87e-12 ***
## PC14         8.099e-04  1.654e-04    4.897 1.00e-06 ***
## PC15        -2.948e-04  1.691e-04   -1.743 0.081348 .  
## PC16         1.194e-03  1.689e-04    7.067 1.79e-12 ***
## PC17        -8.941e-04  1.794e-04   -4.982 6.49e-07 ***
## PC18        -1.442e-03  1.871e-04   -7.705 1.56e-14 ***
## PC19         9.940e-05  1.886e-04    0.527 0.598125    
## PC20         1.808e-03  2.067e-04    8.749  < 2e-16 ***
## PC21         2.458e-04  2.147e-04    1.145 0.252434    
## PC22         7.010e-04  3.356e-04    2.089 0.036764 *  
## PC23         1.443e-03  4.202e-04    3.433 0.000601 ***
## PC24        -3.477e-03  4.894e-04   -7.105 1.37e-12 ***
## PC25         1.354e-03  5.480e-04    2.471 0.013490 *  
## PC26         7.814e-04  5.624e-04    1.389 0.164773    
## PC27         1.204e-03  5.664e-04    2.126 0.033543 *  
## PC28         1.645e-04  5.745e-04    0.286 0.774609    
## PC29         1.517e-03  6.249e-04    2.428 0.015198 *  
## PC30         4.725e-04  6.420e-04    0.736 0.461754    
## PC31        -1.071e-03  6.916e-04   -1.549 0.121499    
## PC32        -2.897e-03  6.954e-04   -4.166 3.16e-05 ***
## PC33         9.478e-05  7.179e-04    0.132 0.894967    
## PC34         4.075e-03  7.486e-04    5.444 5.45e-08 ***
## PC35         4.622e-04  8.049e-04    0.574 0.565869    
## PC36        -1.101e-03  8.141e-04   -1.352 0.176441    
## PC37        -1.679e-03  8.415e-04   -1.996 0.046031 *  
## PC38         1.381e-03  8.742e-04    1.580 0.114220    
## PC39        -3.493e-05  9.325e-04   -0.037 0.970122    
## PC40        -3.500e-04  9.024e-04   -0.388 0.698116    
## PC41        -4.850e-04  9.136e-04   -0.531 0.595545    
## PC42         1.448e-03  9.301e-04    1.557 0.119454    
## PC43         9.782e-04  9.487e-04    1.031 0.302568    
## PC44         6.531e-04  9.541e-04    0.685 0.493663    
## PC45        -1.243e-03  9.428e-04   -1.319 0.187379    
## PC46         2.784e-04  9.507e-04    0.293 0.769670    
## PC47        -1.762e-03  9.627e-04   -1.830 0.067288 .  
## PC48         9.954e-04  9.743e-04    1.022 0.306986    
## PC49         2.300e-03  9.834e-04    2.338 0.019407 *  
## PC50        -1.327e-03  9.956e-04   -1.333 0.182568    
## PC51         1.178e-03  1.003e-03    1.174 0.240539    
## PC52        -3.593e-04  9.989e-04   -0.360 0.719063    
## PC53         1.665e-03  9.962e-04    1.672 0.094615 .  
## PC54        -6.463e-04  1.014e-03   -0.638 0.523801    
## PC55        -1.977e-03  1.012e-03   -1.953 0.050902 .  
## PC56         3.395e-04  1.030e-03    0.330 0.741771    
## PC57        -2.451e-03  1.026e-03   -2.389 0.016943 *  
## PC58        -1.341e-03  1.030e-03   -1.302 0.193055    
## PC59         2.614e-03  1.024e-03    2.552 0.010736 *  
## PC60        -1.879e-03  1.042e-03   -1.803 0.071460 .  
## PC61        -5.407e-04  1.025e-03   -0.528 0.597735    
## PC62         6.312e-04  1.054e-03    0.599 0.549249    
## PC63        -2.096e-03  1.052e-03   -1.992 0.046431 *  
## PC64        -1.793e-03  1.052e-03   -1.705 0.088246 .  
## PC65        -1.661e-03  1.052e-03   -1.578 0.114614    
## PC66        -1.570e-03  1.071e-03   -1.466 0.142662    
## PC67        -1.365e-04  1.076e-03   -0.127 0.899076    
## PC68         1.919e-03  1.074e-03    1.787 0.073964 .  
## PC69         2.473e-03  1.085e-03    2.279 0.022718 *  
## PC70         3.574e-04  1.077e-03    0.332 0.740126    
## PC71         2.018e-03  1.073e-03    1.881 0.060040 .  
## PC72        -1.189e-04  1.084e-03   -0.110 0.912685    
## PC73         1.102e-03  1.094e-03    1.008 0.313655    
## PC74        -5.347e-05  1.101e-03   -0.049 0.961255    
## PC75        -2.533e-03  1.106e-03   -2.290 0.022038 *  
## PC76        -4.901e-04  1.099e-03   -0.446 0.655659    
## PC77         1.877e-03  1.101e-03    1.704 0.088400 .  
## PC78        -9.855e-05  1.107e-03   -0.089 0.929098    
## PC79         3.123e-03  1.118e-03    2.795 0.005214 ** 
## PC80        -7.226e-04  1.129e-03   -0.640 0.522150    
## PC81         4.163e-03  1.128e-03    3.689 0.000227 ***
## PC82         2.091e-04  1.136e-03    0.184 0.853915    
## PC83        -2.914e-03  1.136e-03   -2.565 0.010355 *  
## PC84         3.204e-03  1.136e-03    2.820 0.004819 ** 
## PC85         5.224e-03  1.160e-03    4.505 6.79e-06 ***
## PC86        -7.154e-04  1.148e-03   -0.623 0.533092    
## PC87         7.390e-03  1.162e-03    6.361 2.17e-10 ***
## PC88        -1.939e-03  1.181e-03   -1.643 0.100507    
## PC89        -2.070e-03  1.159e-03   -1.785 0.074296 .  
## PC90        -1.476e-03  1.169e-03   -1.263 0.206738    
## PC91         7.141e-05  1.163e-03    0.061 0.951027    
## PC92         9.746e-04  1.173e-03    0.831 0.406269    
## PC93        -1.545e-03  1.168e-03   -1.322 0.186179    
## PC94        -2.938e-03  1.173e-03   -2.503 0.012329 *  
## PC95        -1.976e-04  1.186e-03   -0.167 0.867716    
## PC96        -3.359e-03  1.188e-03   -2.828 0.004709 ** 
## PC97        -1.164e-03  1.185e-03   -0.982 0.326024    
## PC98        -6.647e-04  1.189e-03   -0.559 0.576069    
## PC99        -6.694e-04  1.194e-03   -0.561 0.575057    
## PC100       -9.248e-04  1.182e-03   -0.783 0.433896    
## PC101       -1.182e-03  1.188e-03   -0.995 0.319678    
## PC102       -1.700e-03  1.206e-03   -1.410 0.158568    
## PC103        2.440e-03  1.196e-03    2.040 0.041400 *  
## PC104       -3.327e-03  1.197e-03   -2.779 0.005464 ** 
## PC105        2.911e-03  1.208e-03    2.409 0.016010 *  
## PC106        3.239e-03  1.197e-03    2.706 0.006838 ** 
## PC107        1.382e-03  1.210e-03    1.142 0.253523    
## PC108       -8.645e-04  1.209e-03   -0.715 0.474654    
## PC109        1.722e-03  1.207e-03    1.426 0.153872    
## PC110       -4.221e-04  1.202e-03   -0.351 0.725557    
## PC111       -3.743e-03  1.218e-03   -3.074 0.002121 ** 
## PC112       -4.776e-04  1.215e-03   -0.393 0.694299    
## PC113        1.859e-03  1.214e-03    1.531 0.125723    
## PC114       -2.621e-03  1.214e-03   -2.159 0.030930 *  
## PC115       -6.776e-03  1.223e-03   -5.540 3.17e-08 ***
## PC116       -1.748e-04  1.222e-03   -0.143 0.886254    
## PC117        4.638e-04  1.215e-03    0.382 0.702773    
## PC118        7.335e-04  1.227e-03    0.598 0.550098    
## PC119       -2.160e-03  1.226e-03   -1.762 0.078197 .  
## PC120        1.097e-03  1.228e-03    0.893 0.371752    
## PC121       -1.600e-03  1.223e-03   -1.308 0.190982    
## PC122        2.408e-03  1.234e-03    1.951 0.051071 .  
## PC123       -1.912e-03  1.237e-03   -1.545 0.122343    
## PC124        2.971e-04  1.244e-03    0.239 0.811165    
## PC125        2.970e-03  1.242e-03    2.391 0.016821 *  
## PC126        7.033e-04  1.234e-03    0.570 0.568623    
## PC127        1.276e-03  1.230e-03    1.037 0.299691    
## PC128       -2.061e-03  1.238e-03   -1.665 0.095898 .  
## PC129       -1.659e-04  1.249e-03   -0.133 0.894302    
## PC130        5.294e-04  1.253e-03    0.423 0.672555    
## PC131       -3.047e-03  1.245e-03   -2.447 0.014443 *  
## PC132        3.191e-04  1.250e-03    0.255 0.798434    
## PC133       -1.188e-03  1.262e-03   -0.941 0.346660    
## PC134        2.908e-03  1.243e-03    2.340 0.019306 *  
## PC135        1.695e-03  1.247e-03    1.360 0.174042    
## PC136        1.668e-03  1.266e-03    1.318 0.187609    
## PC137       -1.638e-03  1.258e-03   -1.302 0.192921    
## PC138        1.992e-03  1.269e-03    1.570 0.116538    
## PC139       -2.933e-03  1.260e-03   -2.328 0.019936 *  
## PC140       -1.559e-03  1.271e-03   -1.226 0.220256    
## PC141        1.003e-03  1.270e-03    0.790 0.429429    
## PC142        1.327e-03  1.271e-03    1.045 0.296258    
## PC143        1.950e-03  1.270e-03    1.536 0.124609    
## PC144        1.772e-03  1.271e-03    1.394 0.163457    
## PC145        3.087e-03  1.277e-03    2.418 0.015639 *  
## PC146        5.987e-03  1.274e-03    4.698 2.70e-06 ***
## PC147       -1.116e-03  1.275e-03   -0.875 0.381405    
## PC148       -1.835e-03  1.261e-03   -1.455 0.145785    
## PC149        7.123e-05  1.280e-03    0.056 0.955640    
## PC150        1.585e-03  1.284e-03    1.235 0.216894    
## PC151        2.715e-03  1.282e-03    2.119 0.034179 *  
## PC152       -5.475e-04  1.295e-03   -0.423 0.672471    
## PC153        2.200e-03  1.277e-03    1.722 0.085101 .  
## PC154       -2.471e-03  1.291e-03   -1.915 0.055561 .  
## PC155        2.414e-03  1.283e-03    1.882 0.059949 .  
## PC156        1.955e-03  1.294e-03    1.511 0.130768    
## PC157        1.366e-03  1.298e-03    1.053 0.292529    
## PC158        6.729e-04  1.301e-03    0.517 0.604969    
## PC159        4.557e-03  1.284e-03    3.549 0.000391 ***
## PC160        5.971e-04  1.296e-03    0.461 0.645090    
## PC161       -6.825e-04  1.288e-03   -0.530 0.596279    
## PC162       -6.019e-03  1.303e-03   -4.620 3.94e-06 ***
## PC163        2.291e-03  1.307e-03    1.754 0.079558 .  
## PC164        4.710e-04  1.295e-03    0.364 0.716096    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09981 on 5149 degrees of freedom
## Multiple R-squared:  0.3423, Adjusted R-squared:  0.3213 
## F-statistic: 16.34 on 164 and 5149 DF,  p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)

## [1] "Number of data points that have Cook's D > 4/n: 240"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before. 
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot 
plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,target=one_of(label.names))

ggplot(data=plotData, aes(x=type,y=target)) +
  geom_boxplot(fill='light blue',outlier.shape=NA) +
  scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
  theme_light() +
  ggtitle('Distribution of High Leverage Points and Normal  Points')

# 2 sample t-tests

plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,one_of(feature.names))

comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
                   , function(x) t.test(x ~ plotData$type, var.equal = TRUE)) 

sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
##          PC1          PC6         PC11         PC23         PC25         PC26         PC28         PC33         PC39 
## 1.858533e-06 3.464657e-02 7.293096e-04 4.145808e-05 1.038696e-03 3.260100e-04 2.978237e-02 2.013420e-02 1.440239e-02 
##         PC44         PC45         PC55         PC75         PC78         PC87        PC124        PC134        PC159 
## 1.095256e-02 8.523271e-04 2.904629e-02 3.300027e-02 2.014597e-02 3.646907e-02 4.622152e-02 2.571908e-02 3.677550e-02 
##        PC161 
## 1.455333e-02
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=5, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

# Distribution (box) Plots
mm = melt(plotData, id=c('type'))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=8, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

Grand Means Model

model.null = lm(grand.mean.formula, data.train)
summary(model.null)
## 
## Call:
## lm(formula = grand.mean.formula, data = data.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42417 -0.09356 -0.01542  0.07840  0.78460 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.001635   0.001889    2647   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1412 on 5583 degrees of freedom

Variable Selection

Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/

Forward Selection with CV

Train

if (algo.forward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   , data = data.train
                                   , method = "leapForward"
                                   , feature.names = feature.names)
  model.forward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 105 on full training set
## [1] "All models results"
##     nvmax      RMSE   Rsquared        MAE      RMSESD RsquaredSD       MAESD
## 1       1 0.1350468 0.08479660 0.10436184 0.005200049 0.01674113 0.002954202
## 2       2 0.1339628 0.09935862 0.10371363 0.005504447 0.01832821 0.003125743
## 3       3 0.1323635 0.12089088 0.10239954 0.005815779 0.02088009 0.003384538
## 4       4 0.1318132 0.12800310 0.10189097 0.005452716 0.01669327 0.003230049
## 5       5 0.1304285 0.14641271 0.10072021 0.005540135 0.01964057 0.003224118
## 6       6 0.1297260 0.15545312 0.10010572 0.005332458 0.01959561 0.002999176
## 7       7 0.1296458 0.15676998 0.10011934 0.005408872 0.01921091 0.003198788
## 8       8 0.1292860 0.16166820 0.09983246 0.005666917 0.02223766 0.003270292
## 9       9 0.1288372 0.16765598 0.09957128 0.005687598 0.02264859 0.003320170
## 10     10 0.1283193 0.17429662 0.09917594 0.005966020 0.02433558 0.003541938
## 11     11 0.1279362 0.17921957 0.09888127 0.005983356 0.02473557 0.003442295
## 12     12 0.1276569 0.18260370 0.09866195 0.005781548 0.02152010 0.003269757
## 13     13 0.1272739 0.18759117 0.09845644 0.005768872 0.02351046 0.003179434
## 14     14 0.1271615 0.18912503 0.09835234 0.005751381 0.02366383 0.003130328
## 15     15 0.1268642 0.19307028 0.09806442 0.005948943 0.02602667 0.003223252
## 16     16 0.1266694 0.19544374 0.09789322 0.005856941 0.02430375 0.003202948
## 17     17 0.1264706 0.19794585 0.09769118 0.005836174 0.02458856 0.003211562
## 18     18 0.1264844 0.19784441 0.09761652 0.005839495 0.02497885 0.003228483
## 19     19 0.1264275 0.19857322 0.09763926 0.005815373 0.02587255 0.003356488
## 20     20 0.1263052 0.20012438 0.09754751 0.005853704 0.02690377 0.003357624
## 21     21 0.1263884 0.19900583 0.09752871 0.005791377 0.02509730 0.003267652
## 22     22 0.1263051 0.20012161 0.09751512 0.005871821 0.02659607 0.003378924
## 23     23 0.1263833 0.19920885 0.09756776 0.005879425 0.02633925 0.003369958
## 24     24 0.1264223 0.19868676 0.09764942 0.005783628 0.02439144 0.003254422
## 25     25 0.1265097 0.19760003 0.09770993 0.005654214 0.02347045 0.003139757
## 26     26 0.1263927 0.19910993 0.09762567 0.005763593 0.02351943 0.003075068
## 27     27 0.1263814 0.19939537 0.09763183 0.005665547 0.02346896 0.003028282
## 28     28 0.1264759 0.19825074 0.09773256 0.005612793 0.02317772 0.002975545
## 29     29 0.1265454 0.19753824 0.09779892 0.005725031 0.02481153 0.003079489
## 30     30 0.1265390 0.19769106 0.09782618 0.005719897 0.02490035 0.003116986
## 31     31 0.1265541 0.19751539 0.09775613 0.005613531 0.02382063 0.003060166
## 32     32 0.1265618 0.19748093 0.09775847 0.005632815 0.02439437 0.003037837
## 33     33 0.1266044 0.19708571 0.09776977 0.005623306 0.02367647 0.002977068
## 34     34 0.1266824 0.19630060 0.09779546 0.005561872 0.02369771 0.002961893
## 35     35 0.1267466 0.19550167 0.09788808 0.005532787 0.02223223 0.002938956
## 36     36 0.1267695 0.19528058 0.09793055 0.005501084 0.02251962 0.002907074
## 37     37 0.1267451 0.19557827 0.09790324 0.005454463 0.02223763 0.002888294
## 38     38 0.1267693 0.19534713 0.09797083 0.005391634 0.02254449 0.002786412
## 39     39 0.1266653 0.19656588 0.09788934 0.005448440 0.02266691 0.002864593
## 40     40 0.1266227 0.19715789 0.09789099 0.005471754 0.02366100 0.002851723
## 41     41 0.1266599 0.19673873 0.09790957 0.005442954 0.02262700 0.002774776
## 42     42 0.1266683 0.19672580 0.09792747 0.005549211 0.02294082 0.002831955
## 43     43 0.1266416 0.19709099 0.09790827 0.005505759 0.02267340 0.002819348
## 44     44 0.1266086 0.19739575 0.09790638 0.005416161 0.02188588 0.002757002
## 45     45 0.1265564 0.19808013 0.09787427 0.005384404 0.02171558 0.002700684
## 46     46 0.1265033 0.19873246 0.09783729 0.005437616 0.02215195 0.002743259
## 47     47 0.1264629 0.19926641 0.09778888 0.005404469 0.02336698 0.002781684
## 48     48 0.1264402 0.19963053 0.09776667 0.005427534 0.02371584 0.002847824
## 49     49 0.1264640 0.19934378 0.09777062 0.005452431 0.02407712 0.002890590
## 50     50 0.1265739 0.19815080 0.09789846 0.005471787 0.02397588 0.002942941
## 51     51 0.1265812 0.19812751 0.09789149 0.005435745 0.02455158 0.002918752
## 52     52 0.1265963 0.19808387 0.09790489 0.005474396 0.02586658 0.002915236
## 53     53 0.1265387 0.19880186 0.09781448 0.005522736 0.02669581 0.002978187
## 54     54 0.1264480 0.19991283 0.09779467 0.005474579 0.02633216 0.002956309
## 55     55 0.1264945 0.19946218 0.09779117 0.005562932 0.02600976 0.003009604
## 56     56 0.1265335 0.19903781 0.09777038 0.005640060 0.02620808 0.003049090
## 57     57 0.1264780 0.19967890 0.09774572 0.005622726 0.02576035 0.003065567
## 58     58 0.1263953 0.20068770 0.09767922 0.005641043 0.02619692 0.003118879
## 59     59 0.1263521 0.20121414 0.09763178 0.005575344 0.02568152 0.003113462
## 60     60 0.1263945 0.20070174 0.09763835 0.005529801 0.02507800 0.003095519
## 61     61 0.1264187 0.20041309 0.09764587 0.005479315 0.02500401 0.003073573
## 62     62 0.1264141 0.20045683 0.09765546 0.005456674 0.02474086 0.003041280
## 63     63 0.1263678 0.20100727 0.09758721 0.005456709 0.02479016 0.003053931
## 64     64 0.1264259 0.20031310 0.09763725 0.005424525 0.02429027 0.003013452
## 65     65 0.1263915 0.20074182 0.09762058 0.005474633 0.02472960 0.003067736
## 66     66 0.1263666 0.20102402 0.09763115 0.005459813 0.02481906 0.003069059
## 67     67 0.1263352 0.20143800 0.09762473 0.005419271 0.02429941 0.003044765
## 68     68 0.1263604 0.20111577 0.09767420 0.005388026 0.02386378 0.003004040
## 69     69 0.1263256 0.20149491 0.09766525 0.005393875 0.02350204 0.003042984
## 70     70 0.1263191 0.20158462 0.09768275 0.005388450 0.02318230 0.003041991
## 71     71 0.1263319 0.20152204 0.09765607 0.005424528 0.02345475 0.003059108
## 72     72 0.1263507 0.20134278 0.09765753 0.005409843 0.02326292 0.003029063
## 73     73 0.1263127 0.20174794 0.09764753 0.005408784 0.02371303 0.003030095
## 74     74 0.1262823 0.20213249 0.09763507 0.005380452 0.02341940 0.003051790
## 75     75 0.1262955 0.20193889 0.09764842 0.005294301 0.02208380 0.003008360
## 76     76 0.1262576 0.20237163 0.09760968 0.005285759 0.02153622 0.002943634
## 77     77 0.1262600 0.20238827 0.09764030 0.005304007 0.02217754 0.002983743
## 78     78 0.1262513 0.20249432 0.09765677 0.005316849 0.02183534 0.002975682
## 79     79 0.1262544 0.20250739 0.09764341 0.005361154 0.02205173 0.003021316
## 80     80 0.1262282 0.20283190 0.09760474 0.005351637 0.02188648 0.003037540
## 81     81 0.1261947 0.20320912 0.09756195 0.005370144 0.02142310 0.003045244
## 82     82 0.1262093 0.20305084 0.09757602 0.005407741 0.02185420 0.003112448
## 83     83 0.1261835 0.20338392 0.09755670 0.005398219 0.02174522 0.003121264
## 84     84 0.1261907 0.20334297 0.09754375 0.005393609 0.02151949 0.003151462
## 85     85 0.1261944 0.20333682 0.09751809 0.005375952 0.02196709 0.003147737
## 86     86 0.1262177 0.20304697 0.09752040 0.005390500 0.02250195 0.003188625
## 87     87 0.1262038 0.20328214 0.09750174 0.005386450 0.02294063 0.003212902
## 88     88 0.1262078 0.20322895 0.09749956 0.005371534 0.02238164 0.003149563
## 89     89 0.1261955 0.20335164 0.09746301 0.005393621 0.02259959 0.003139729
## 90     90 0.1262031 0.20327162 0.09747814 0.005369308 0.02265144 0.003124153
## 91     91 0.1262115 0.20318239 0.09747607 0.005327149 0.02229746 0.003105752
## 92     92 0.1261785 0.20358110 0.09743758 0.005340240 0.02258254 0.003133814
## 93     93 0.1261453 0.20398661 0.09740363 0.005350952 0.02287706 0.003111678
## 94     94 0.1260985 0.20453063 0.09737577 0.005314660 0.02309334 0.003073066
## 95     95 0.1261075 0.20443558 0.09738597 0.005357368 0.02297656 0.003099529
## 96     96 0.1260961 0.20457613 0.09738752 0.005369947 0.02294345 0.003079345
## 97     97 0.1260800 0.20479082 0.09739708 0.005360230 0.02250364 0.003048303
## 98     98 0.1260107 0.20557862 0.09733183 0.005379360 0.02215388 0.003057055
## 99     99 0.1259765 0.20597782 0.09730535 0.005412239 0.02246005 0.003091609
## 100   100 0.1259431 0.20637870 0.09728196 0.005414094 0.02208661 0.003088996
## 101   101 0.1259543 0.20627339 0.09729632 0.005403693 0.02176376 0.003094779
## 102   102 0.1259519 0.20631170 0.09729305 0.005411918 0.02220200 0.003113944
## 103   103 0.1259435 0.20641424 0.09729648 0.005423045 0.02270903 0.003152905
## 104   104 0.1259293 0.20662362 0.09729779 0.005443799 0.02288207 0.003176505
## 105   105 0.1259052 0.20691319 0.09728686 0.005450328 0.02273456 0.003193748
## 106   106 0.1259107 0.20687083 0.09728522 0.005454628 0.02284862 0.003198774
## 107   107 0.1259421 0.20653396 0.09731947 0.005455741 0.02303401 0.003209778
## 108   108 0.1259642 0.20628464 0.09733366 0.005451393 0.02285053 0.003210432
## 109   109 0.1259634 0.20631324 0.09732236 0.005428053 0.02250955 0.003172108
## 110   110 0.1259679 0.20631675 0.09732433 0.005452312 0.02253926 0.003188762
## 111   111 0.1259710 0.20628299 0.09731483 0.005405769 0.02232346 0.003160585
## 112   112 0.1259680 0.20629315 0.09730748 0.005372698 0.02230929 0.003141726
## 113   113 0.1259777 0.20620584 0.09727584 0.005390408 0.02245424 0.003154222
## 114   114 0.1259837 0.20610423 0.09727649 0.005382296 0.02224748 0.003164201
## 115   115 0.1259927 0.20600764 0.09729254 0.005369456 0.02205479 0.003167334
## 116   116 0.1260079 0.20583256 0.09731536 0.005373486 0.02195621 0.003168683
## 117   117 0.1260436 0.20544271 0.09734919 0.005381737 0.02214890 0.003172554
## 118   118 0.1260755 0.20509228 0.09737181 0.005379707 0.02229585 0.003181631
## 119   119 0.1260912 0.20491679 0.09738780 0.005389848 0.02222420 0.003183516
## 120   120 0.1260932 0.20491415 0.09739337 0.005377898 0.02206991 0.003183617
## 121   121 0.1261103 0.20473485 0.09741187 0.005383653 0.02215728 0.003184888
## 122   122 0.1261143 0.20470180 0.09741450 0.005400070 0.02232558 0.003205105
## 123   123 0.1261168 0.20467904 0.09742284 0.005400710 0.02235790 0.003202254
## 124   124 0.1261518 0.20429543 0.09745291 0.005401608 0.02242096 0.003192159
## 125   125 0.1261578 0.20424997 0.09746621 0.005426437 0.02270965 0.003225716
## 126   126 0.1261498 0.20435044 0.09746041 0.005430491 0.02253847 0.003242284
## 127   127 0.1261696 0.20413922 0.09746916 0.005456752 0.02279187 0.003256842
## 128   128 0.1261829 0.20399744 0.09748504 0.005445996 0.02260785 0.003246411
## 129   129 0.1261897 0.20392713 0.09748183 0.005438888 0.02250361 0.003248616
## 130   130 0.1261895 0.20393660 0.09748744 0.005443065 0.02266185 0.003250012
## 131   131 0.1261799 0.20405202 0.09748314 0.005456300 0.02277028 0.003249011
## 132   132 0.1261836 0.20401152 0.09748293 0.005452311 0.02268762 0.003255315
## 133   133 0.1261815 0.20401891 0.09747862 0.005445579 0.02247635 0.003247413
## 134   134 0.1261903 0.20392632 0.09748109 0.005431535 0.02239767 0.003242716
## 135   135 0.1262091 0.20372009 0.09749865 0.005434270 0.02239300 0.003260230
## 136   136 0.1261984 0.20383965 0.09748288 0.005457746 0.02252193 0.003274988
## 137   137 0.1261996 0.20384482 0.09748329 0.005463124 0.02247554 0.003277424
## 138   138 0.1261933 0.20390311 0.09748450 0.005451886 0.02239290 0.003265183
## 139   139 0.1261872 0.20397997 0.09748649 0.005445918 0.02238028 0.003259647
## 140   140 0.1262145 0.20367801 0.09750743 0.005445322 0.02224547 0.003260391
## 141   141 0.1262132 0.20368643 0.09749754 0.005442087 0.02210782 0.003254281
## 142   142 0.1262146 0.20367262 0.09750135 0.005437216 0.02215196 0.003248653
## 143   143 0.1262096 0.20372876 0.09750517 0.005438812 0.02213515 0.003254118
## 144   144 0.1262141 0.20367935 0.09750982 0.005435561 0.02206687 0.003248656
## 145   145 0.1262060 0.20376658 0.09750987 0.005442310 0.02209094 0.003254436
## 146   146 0.1262051 0.20377735 0.09750987 0.005451347 0.02213734 0.003253233
## 147   147 0.1262041 0.20378497 0.09751183 0.005461005 0.02219693 0.003258773
## 148   148 0.1261995 0.20384308 0.09751107 0.005465376 0.02218786 0.003265796
## 149   149 0.1262044 0.20378893 0.09751134 0.005468523 0.02218228 0.003262595
## 150   150 0.1262027 0.20381211 0.09750763 0.005468089 0.02217296 0.003261503
## 151   151 0.1262090 0.20373619 0.09751575 0.005460952 0.02209842 0.003255790
## 152   152 0.1262037 0.20378890 0.09751525 0.005462154 0.02208358 0.003251597
## 153   153 0.1262071 0.20375273 0.09752227 0.005465137 0.02207422 0.003253257
## 154   154 0.1262073 0.20375241 0.09752350 0.005463187 0.02208191 0.003250070
## 155   155 0.1262034 0.20380067 0.09752149 0.005463287 0.02209506 0.003248892
## 156   156 0.1262023 0.20381833 0.09751753 0.005464455 0.02209686 0.003251020
## 157   157 0.1262058 0.20377730 0.09752006 0.005464224 0.02208234 0.003250305
## 158   158 0.1262066 0.20376829 0.09752177 0.005461336 0.02206925 0.003248812
## 159   159 0.1262076 0.20375611 0.09752366 0.005460250 0.02206564 0.003248059
## 160   160 0.1262103 0.20372445 0.09752663 0.005458698 0.02202558 0.003245804
## 161   161 0.1262111 0.20371422 0.09752690 0.005459543 0.02202359 0.003246306
## 162   162 0.1262102 0.20372134 0.09752614 0.005458645 0.02202366 0.003245864
## 163   163 0.1262109 0.20371360 0.09752644 0.005459111 0.02202303 0.003246147
## 164   164 0.1262107 0.20371699 0.09752633 0.005459230 0.02202556 0.003246293
## [1] "Best Model"
##     nvmax
## 105   105

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  5.0014260845  4.998178e+00  5.004674e+00
## PC1         -0.0016578295 -1.939429e-03 -1.376230e-03
## PC2         -0.0035750955 -3.860737e-03 -3.289454e-03
## PC3         -0.0015501926 -1.836532e-03 -1.263853e-03
## PC4         -0.0012180774 -1.509401e-03 -9.267541e-04
## PC5          0.0007735608  4.718221e-04  1.075300e-03
## PC6         -0.0004595017 -7.609634e-04 -1.580401e-04
## PC7         -0.0007431548 -1.051640e-03 -4.346692e-04
## PC8         -0.0002118816 -5.243793e-04  1.006161e-04
## PC9         -0.0001761755 -4.985975e-04  1.462465e-04
## PC11        -0.0021135209 -2.464943e-03 -1.762099e-03
## PC12        -0.0017536810 -2.125771e-03 -1.381591e-03
## PC13         0.0011491789  7.712343e-04  1.527123e-03
## PC14         0.0009467031  5.559444e-04  1.337462e-03
## PC16         0.0013417027  9.419079e-04  1.741498e-03
## PC17        -0.0008029682 -1.227550e-03 -3.783867e-04
## PC18        -0.0014706461 -1.913809e-03 -1.027483e-03
## PC20         0.0017173999  1.229914e-03  2.204886e-03
## PC21         0.0002436889 -2.633654e-04  7.507433e-04
## PC22         0.0005313850 -2.621268e-04  1.324897e-03
## PC23         0.0012901432  3.129468e-04  2.267340e-03
## PC24        -0.0032796404 -4.428912e-03 -2.130369e-03
## PC25         0.0011063330 -1.757167e-04  2.388383e-03
## PC26         0.0014168399  9.744754e-05  2.736232e-03
## PC27         0.0015982836  2.710758e-04  2.925491e-03
## PC28         0.0006027089 -7.458642e-04  1.951282e-03
## PC29         0.0015286200  5.311022e-05  3.004130e-03
## PC31        -0.0009951147 -2.614946e-03  6.247170e-04
## PC32        -0.0027512834 -4.387171e-03 -1.115396e-03
## PC33         0.0012921149 -3.804795e-04  2.964709e-03
## PC34         0.0040388349  2.271057e-03  5.806613e-03
## PC37        -0.0013890918 -3.368627e-03  5.904436e-04
## PC38         0.0011074596 -9.527628e-04  3.167682e-03
## PC44         0.0017152667 -4.707702e-04  3.901304e-03
## PC45        -0.0013005600 -3.489601e-03  8.884812e-04
## PC47        -0.0015957827 -3.833214e-03  6.416481e-04
## PC48         0.0012556465 -1.026536e-03  3.537829e-03
## PC49         0.0011123770 -1.175229e-03  3.399983e-03
## PC57        -0.0016637368 -4.069017e-03  7.415431e-04
## PC59         0.0024513197  7.462271e-05  4.828017e-03
## PC60        -0.0013234917 -3.749661e-03  1.102678e-03
## PC63        -0.0021920542 -4.637903e-03  2.537943e-04
## PC64        -0.0019720970 -4.428624e-03  4.844301e-04
## PC65        -0.0011485676 -3.599165e-03  1.302029e-03
## PC66        -0.0023344523 -4.817857e-03  1.489525e-04
## PC68         0.0026013386  9.894665e-05  5.103731e-03
## PC69         0.0016019835 -9.139311e-04  4.117898e-03
## PC71         0.0028849599  3.688310e-04  5.401089e-03
## PC74        -0.0016045929 -4.171966e-03  9.627805e-04
## PC75        -0.0034056174 -5.987307e-03 -8.239278e-04
## PC77         0.0017444035 -8.409127e-04  4.329720e-03
## PC78         0.0014906992 -1.104279e-03  4.085678e-03
## PC79         0.0024646909 -1.554206e-04  5.084802e-03
## PC80        -0.0012272159 -3.891748e-03  1.437317e-03
## PC81         0.0038008121  1.145317e-03  6.456307e-03
## PC83        -0.0025461363 -5.197721e-03  1.054481e-04
## PC84         0.0034510898  7.845560e-04  6.117624e-03
## PC85         0.0046356082  1.929448e-03  7.341768e-03
## PC86        -0.0021140182 -4.813685e-03  5.856487e-04
## PC87         0.0079917487  5.259955e-03  1.072354e-02
## PC88        -0.0019045359 -4.675823e-03  8.667510e-04
## PC89        -0.0025765316 -5.301423e-03  1.483598e-04
## PC90        -0.0019874258 -4.724691e-03  7.498397e-04
## PC94        -0.0037782715 -6.542931e-03 -1.013612e-03
## PC96        -0.0038403925 -6.644133e-03 -1.036652e-03
## PC97        -0.0018094836 -4.596880e-03  9.779123e-04
## PC99        -0.0021723584 -4.974145e-03  6.294277e-04
## PC102       -0.0024153961 -5.255351e-03  4.245584e-04
## PC103        0.0028025654 -1.507000e-05  5.620201e-03
## PC104       -0.0037283368 -6.556930e-03 -8.997441e-04
## PC105        0.0030271855  1.888463e-04  5.865525e-03
## PC106        0.0036465850  8.213900e-04  6.471780e-03
## PC107        0.0012164069 -1.615229e-03  4.048043e-03
## PC109        0.0021445414 -7.014383e-04  4.990521e-03
## PC111       -0.0033480307 -6.212560e-03 -4.835014e-04
## PC113        0.0016276097 -1.241969e-03  4.497188e-03
## PC114       -0.0028422939 -5.706122e-03  2.153468e-05
## PC115       -0.0057720542 -8.656378e-03 -2.887730e-03
## PC118        0.0023483540 -5.493700e-04  5.246078e-03
## PC119       -0.0028879482 -5.781806e-03  5.910083e-06
## PC122        0.0025859779 -3.270246e-04  5.498980e-03
## PC123       -0.0024398312 -5.358963e-03  4.793004e-04
## PC124        0.0016884637 -1.237151e-03  4.614079e-03
## PC125        0.0020562283 -8.782947e-04  4.990751e-03
## PC127        0.0023169044 -5.861867e-04  5.219995e-03
## PC128       -0.0019080380 -4.835402e-03  1.019326e-03
## PC131       -0.0039830944 -6.925845e-03 -1.040343e-03
## PC133       -0.0014171974 -4.370144e-03  1.535750e-03
## PC134        0.0045785462  1.647384e-03  7.509709e-03
## PC135        0.0030155686  7.197429e-05  5.959163e-03
## PC138        0.0018387452 -1.142617e-03  4.820108e-03
## PC139       -0.0034859165 -6.452569e-03 -5.192640e-04
## PC143        0.0020890014 -9.103155e-04  5.088318e-03
## PC144        0.0025042869 -4.868277e-04  5.495401e-03
## PC145        0.0017490944 -1.253714e-03  4.751903e-03
## PC146        0.0046938976  1.679202e-03  7.708593e-03
## PC148       -0.0020388941 -5.021753e-03  9.439644e-04
## PC151        0.0030104170 -2.335209e-05  6.044186e-03
## PC153        0.0033871778  3.669846e-04  6.407371e-03
## PC154       -0.0036129566 -6.648374e-03 -5.775390e-04
## PC155        0.0032444401  2.058263e-04  6.283054e-03
## PC156        0.0033817945  3.344581e-04  6.429131e-03
## PC159        0.0057040318  2.664389e-03  8.743675e-03
## PC161        0.0015546860 -1.486372e-03  4.595744e-03
## PC162       -0.0051450187 -8.221766e-03 -2.068271e-03
## PC163        0.0025833525 -4.850980e-04  5.651803e-03

Test

if (algo.forward.caret == TRUE){
    test.model(model=model.forward, test=data.test
             ,method = 'leapForward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.639   4.961   5.012   5.001   5.049   5.143 
## [1] "leapForward  Test MSE: 0.0138580579096168"
## [1] "leapForward  Test RMSE: 0.117720252758889"
## [1] "leapForward  Test MSE (Org Scale): 82.2350242465757"
## [1] "leapForward  Test RMSE (Org Scale): 9.06835289601015"

Backward Elimination with CV

Train

if (algo.backward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapBackward"
                                   ,feature.names =  feature.names)
  model.backward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 104 on full training set
## [1] "All models results"
##     nvmax      RMSE   Rsquared        MAE      RMSESD RsquaredSD       MAESD
## 1       1 0.1350468 0.08479660 0.10436184 0.005200049 0.01674113 0.002954202
## 2       2 0.1339628 0.09935862 0.10371363 0.005504447 0.01832821 0.003125743
## 3       3 0.1323635 0.12089088 0.10239954 0.005815779 0.02088009 0.003384538
## 4       4 0.1318132 0.12800310 0.10189097 0.005452716 0.01669327 0.003230049
## 5       5 0.1304285 0.14641271 0.10072021 0.005540135 0.01964057 0.003224118
## 6       6 0.1297260 0.15545312 0.10010572 0.005332458 0.01959561 0.002999176
## 7       7 0.1296458 0.15676998 0.10011934 0.005408872 0.01921091 0.003198788
## 8       8 0.1292860 0.16166820 0.09983246 0.005666917 0.02223766 0.003270292
## 9       9 0.1288372 0.16765598 0.09957128 0.005687598 0.02264859 0.003320170
## 10     10 0.1283193 0.17429662 0.09917594 0.005966020 0.02433558 0.003541938
## 11     11 0.1279362 0.17921957 0.09888127 0.005983356 0.02473557 0.003442295
## 12     12 0.1276569 0.18260370 0.09866195 0.005781548 0.02152010 0.003269757
## 13     13 0.1272739 0.18759117 0.09845644 0.005768872 0.02351046 0.003179434
## 14     14 0.1271615 0.18912503 0.09835234 0.005751381 0.02366383 0.003130328
## 15     15 0.1268642 0.19307028 0.09806442 0.005948943 0.02602667 0.003223252
## 16     16 0.1266694 0.19544374 0.09789322 0.005856941 0.02430375 0.003202948
## 17     17 0.1264706 0.19794585 0.09769118 0.005836174 0.02458856 0.003211562
## 18     18 0.1264844 0.19784441 0.09761652 0.005839495 0.02497885 0.003228483
## 19     19 0.1264275 0.19857322 0.09763926 0.005815373 0.02587255 0.003356488
## 20     20 0.1263052 0.20012438 0.09754751 0.005853704 0.02690377 0.003357624
## 21     21 0.1263884 0.19900583 0.09752871 0.005791377 0.02509730 0.003267652
## 22     22 0.1263051 0.20012161 0.09751512 0.005871821 0.02659607 0.003378924
## 23     23 0.1263833 0.19920885 0.09756776 0.005879425 0.02633925 0.003369958
## 24     24 0.1264223 0.19868676 0.09764942 0.005783628 0.02439144 0.003254422
## 25     25 0.1265097 0.19760003 0.09770993 0.005654214 0.02347045 0.003139757
## 26     26 0.1263927 0.19910993 0.09762567 0.005763593 0.02351943 0.003075068
## 27     27 0.1263814 0.19939537 0.09763183 0.005665547 0.02346896 0.003028282
## 28     28 0.1264759 0.19825074 0.09773256 0.005612793 0.02317772 0.002975545
## 29     29 0.1265454 0.19753824 0.09779892 0.005725031 0.02481153 0.003079489
## 30     30 0.1265390 0.19769106 0.09782618 0.005719897 0.02490035 0.003116986
## 31     31 0.1265541 0.19751539 0.09775613 0.005613531 0.02382063 0.003060166
## 32     32 0.1265618 0.19748093 0.09775847 0.005632815 0.02439437 0.003037837
## 33     33 0.1266044 0.19708571 0.09776977 0.005623306 0.02367647 0.002977068
## 34     34 0.1266824 0.19630060 0.09779546 0.005561872 0.02369771 0.002961893
## 35     35 0.1267466 0.19550167 0.09788808 0.005532787 0.02223223 0.002938956
## 36     36 0.1267913 0.19505671 0.09793542 0.005516065 0.02268444 0.002909473
## 37     37 0.1267420 0.19564293 0.09790417 0.005452299 0.02218800 0.002888763
## 38     38 0.1267693 0.19534713 0.09797083 0.005391634 0.02254449 0.002786412
## 39     39 0.1266941 0.19621807 0.09792208 0.005443768 0.02270683 0.002867403
## 40     40 0.1266032 0.19731311 0.09784475 0.005453620 0.02362891 0.002823375
## 41     41 0.1266487 0.19686721 0.09784641 0.005438635 0.02263536 0.002741763
## 42     42 0.1266494 0.19690176 0.09790198 0.005535586 0.02286920 0.002815589
## 43     43 0.1266414 0.19702178 0.09788982 0.005505565 0.02269165 0.002826998
## 44     44 0.1265910 0.19759202 0.09788469 0.005419004 0.02186087 0.002753382
## 45     45 0.1265590 0.19806497 0.09788721 0.005415209 0.02182409 0.002714861
## 46     46 0.1264753 0.19906952 0.09781345 0.005378691 0.02182379 0.002697631
## 47     47 0.1264665 0.19923416 0.09779211 0.005412107 0.02339531 0.002787665
## 48     48 0.1264306 0.19973851 0.09776269 0.005428465 0.02368400 0.002846922
## 49     49 0.1264553 0.19944771 0.09778577 0.005451932 0.02384344 0.002878485
## 50     50 0.1265133 0.19882190 0.09784700 0.005461735 0.02433038 0.002932090
## 51     51 0.1265147 0.19886789 0.09782985 0.005351818 0.02448614 0.002879602
## 52     52 0.1265157 0.19893644 0.09779332 0.005421718 0.02555480 0.002861577
## 53     53 0.1264570 0.19971512 0.09774071 0.005418478 0.02632611 0.002892773
## 54     54 0.1264024 0.20043931 0.09772371 0.005448019 0.02603882 0.002920868
## 55     55 0.1265126 0.19924728 0.09779898 0.005559385 0.02607842 0.003011734
## 56     56 0.1265417 0.19895376 0.09778923 0.005641219 0.02625676 0.003057185
## 57     57 0.1264784 0.19967584 0.09775607 0.005622519 0.02575725 0.003060311
## 58     58 0.1264265 0.20027907 0.09769431 0.005627343 0.02580792 0.003111743
## 59     59 0.1263829 0.20080840 0.09763272 0.005562019 0.02530009 0.003113028
## 60     60 0.1263945 0.20070174 0.09763835 0.005529801 0.02507800 0.003095519
## 61     61 0.1264140 0.20046460 0.09764594 0.005481315 0.02505148 0.003073537
## 62     62 0.1264087 0.20052054 0.09765076 0.005458976 0.02479817 0.003043535
## 63     63 0.1263678 0.20100727 0.09758721 0.005456709 0.02479016 0.003053931
## 64     64 0.1264259 0.20031310 0.09763725 0.005424525 0.02429027 0.003013452
## 65     65 0.1263915 0.20074182 0.09762058 0.005474633 0.02472960 0.003067736
## 66     66 0.1263666 0.20102402 0.09763115 0.005459813 0.02481906 0.003069059
## 67     67 0.1263352 0.20143800 0.09762473 0.005419271 0.02429941 0.003044765
## 68     68 0.1263599 0.20113250 0.09767271 0.005387661 0.02385602 0.003002265
## 69     69 0.1263252 0.20151057 0.09766418 0.005393598 0.02349664 0.003041828
## 70     70 0.1263002 0.20181473 0.09765565 0.005374837 0.02309418 0.003011970
## 71     71 0.1263319 0.20152204 0.09765607 0.005424528 0.02345475 0.003059108
## 72     72 0.1263507 0.20134278 0.09765753 0.005409843 0.02326292 0.003029063
## 73     73 0.1263127 0.20174794 0.09764753 0.005408784 0.02371303 0.003030095
## 74     74 0.1262823 0.20213249 0.09763507 0.005380452 0.02341940 0.003051790
## 75     75 0.1262725 0.20223029 0.09763208 0.005304047 0.02236670 0.003016053
## 76     76 0.1262371 0.20264131 0.09758679 0.005292581 0.02168862 0.002952700
## 77     77 0.1262565 0.20241892 0.09762932 0.005299260 0.02191771 0.002986881
## 78     78 0.1262637 0.20234448 0.09764313 0.005310396 0.02162498 0.002980389
## 79     79 0.1262544 0.20250739 0.09764341 0.005361154 0.02205173 0.003021316
## 80     80 0.1262282 0.20283190 0.09760474 0.005351637 0.02188648 0.003037540
## 81     81 0.1261947 0.20320912 0.09756195 0.005370144 0.02142310 0.003045244
## 82     82 0.1262093 0.20305084 0.09757602 0.005407741 0.02185420 0.003112448
## 83     83 0.1261835 0.20338392 0.09755670 0.005398219 0.02174522 0.003121264
## 84     84 0.1261943 0.20328274 0.09754422 0.005395969 0.02155500 0.003151575
## 85     85 0.1261801 0.20351312 0.09750636 0.005366685 0.02186587 0.003144886
## 86     86 0.1262133 0.20312423 0.09750534 0.005379930 0.02243058 0.003187798
## 87     87 0.1262237 0.20307418 0.09750392 0.005375135 0.02298658 0.003205483
## 88     88 0.1262038 0.20325928 0.09749188 0.005369043 0.02236716 0.003147748
## 89     89 0.1261844 0.20346605 0.09745517 0.005386843 0.02254646 0.003137870
## 90     90 0.1261934 0.20336665 0.09747709 0.005363438 0.02261102 0.003123915
## 91     91 0.1262115 0.20318239 0.09747607 0.005327149 0.02229746 0.003105752
## 92     92 0.1262047 0.20326525 0.09745141 0.005331226 0.02242038 0.003132270
## 93     93 0.1261225 0.20423298 0.09736894 0.005299431 0.02271374 0.003095875
## 94     94 0.1261084 0.20441607 0.09739394 0.005335749 0.02319458 0.003102660
## 95     95 0.1261075 0.20443558 0.09738597 0.005357368 0.02297656 0.003099529
## 96     96 0.1260763 0.20481307 0.09738863 0.005369982 0.02279456 0.003082313
## 97     97 0.1260529 0.20509771 0.09737907 0.005358122 0.02234591 0.003048558
## 98     98 0.1260253 0.20541944 0.09735268 0.005372822 0.02191204 0.003049515
## 99     99 0.1259889 0.20583918 0.09732135 0.005405219 0.02221314 0.003082910
## 100   100 0.1259431 0.20637870 0.09728196 0.005414094 0.02208661 0.003088996
## 101   101 0.1259506 0.20631410 0.09728754 0.005405848 0.02170002 0.003091968
## 102   102 0.1259470 0.20637569 0.09728811 0.005414784 0.02210487 0.003112447
## 103   103 0.1259149 0.20675939 0.09727566 0.005431964 0.02230887 0.003145610
## 104   104 0.1259051 0.20690915 0.09728373 0.005450166 0.02258272 0.003171473
## 105   105 0.1259052 0.20691319 0.09728686 0.005450328 0.02273456 0.003193748
## 106   106 0.1259107 0.20687083 0.09728522 0.005454628 0.02284862 0.003198774
## 107   107 0.1259421 0.20653396 0.09731947 0.005455741 0.02303401 0.003209778
## 108   108 0.1259642 0.20628464 0.09733366 0.005451393 0.02285053 0.003210432
## 109   109 0.1259634 0.20631324 0.09732236 0.005428053 0.02250955 0.003172108
## 110   110 0.1259679 0.20631675 0.09732433 0.005452312 0.02253926 0.003188762
## 111   111 0.1259710 0.20628299 0.09731483 0.005405769 0.02232346 0.003160585
## 112   112 0.1259855 0.20608547 0.09732461 0.005368711 0.02188531 0.003127433
## 113   113 0.1259943 0.20601284 0.09729940 0.005382886 0.02194380 0.003143501
## 114   114 0.1259949 0.20597609 0.09728958 0.005376087 0.02186849 0.003160552
## 115   115 0.1259927 0.20600764 0.09729254 0.005369456 0.02205479 0.003167334
## 116   116 0.1260079 0.20583256 0.09731536 0.005373486 0.02195621 0.003168683
## 117   117 0.1260436 0.20544271 0.09734919 0.005381737 0.02214890 0.003172554
## 118   118 0.1260861 0.20496671 0.09737930 0.005377389 0.02204373 0.003175491
## 119   119 0.1260895 0.20493554 0.09738733 0.005390235 0.02226188 0.003183898
## 120   120 0.1260932 0.20491415 0.09739337 0.005377898 0.02206991 0.003183617
## 121   121 0.1261028 0.20481386 0.09741028 0.005383794 0.02211571 0.003185459
## 122   122 0.1261127 0.20471560 0.09741576 0.005400097 0.02231831 0.003204656
## 123   123 0.1261227 0.20461441 0.09742624 0.005400570 0.02239069 0.003201025
## 124   124 0.1261518 0.20429543 0.09745291 0.005401608 0.02242096 0.003192159
## 125   125 0.1261578 0.20424997 0.09746621 0.005426437 0.02270965 0.003225716
## 126   126 0.1261498 0.20435044 0.09746041 0.005430491 0.02253847 0.003242284
## 127   127 0.1261719 0.20411142 0.09747511 0.005455823 0.02276624 0.003253842
## 128   128 0.1261756 0.20408420 0.09747046 0.005439971 0.02253104 0.003241995
## 129   129 0.1261882 0.20394601 0.09748106 0.005432678 0.02246081 0.003239384
## 130   130 0.1261895 0.20393660 0.09748744 0.005443065 0.02266185 0.003250012
## 131   131 0.1261799 0.20405202 0.09748314 0.005456300 0.02277028 0.003249011
## 132   132 0.1261836 0.20401152 0.09748293 0.005452311 0.02268762 0.003255315
## 133   133 0.1261815 0.20401891 0.09747862 0.005445579 0.02247635 0.003247413
## 134   134 0.1261903 0.20392632 0.09748109 0.005431535 0.02239767 0.003242716
## 135   135 0.1262091 0.20372009 0.09749865 0.005434270 0.02239300 0.003260230
## 136   136 0.1261984 0.20383965 0.09748288 0.005457746 0.02252193 0.003274988
## 137   137 0.1261996 0.20384482 0.09748329 0.005463124 0.02247554 0.003277424
## 138   138 0.1261933 0.20390311 0.09748450 0.005451886 0.02239290 0.003265183
## 139   139 0.1261869 0.20397962 0.09748228 0.005445921 0.02238047 0.003261087
## 140   140 0.1262163 0.20365760 0.09750413 0.005445294 0.02225625 0.003261575
## 141   141 0.1262184 0.20362681 0.09749855 0.005442033 0.02214024 0.003253918
## 142   142 0.1262113 0.20370244 0.09750014 0.005437250 0.02213571 0.003249088
## 143   143 0.1262096 0.20372876 0.09750517 0.005438812 0.02213515 0.003254118
## 144   144 0.1262141 0.20367935 0.09750982 0.005435561 0.02206687 0.003248656
## 145   145 0.1262060 0.20376658 0.09750987 0.005442310 0.02209094 0.003254436
## 146   146 0.1262051 0.20377735 0.09750987 0.005451347 0.02213734 0.003253233
## 147   147 0.1262041 0.20378497 0.09751183 0.005461005 0.02219693 0.003258773
## 148   148 0.1261995 0.20384308 0.09751107 0.005465376 0.02218786 0.003265796
## 149   149 0.1262044 0.20378893 0.09751134 0.005468523 0.02218228 0.003262595
## 150   150 0.1262027 0.20381211 0.09750763 0.005468089 0.02217296 0.003261503
## 151   151 0.1262090 0.20373619 0.09751575 0.005460952 0.02209842 0.003255790
## 152   152 0.1262037 0.20378890 0.09751525 0.005462154 0.02208358 0.003251597
## 153   153 0.1262071 0.20375273 0.09752227 0.005465137 0.02207422 0.003253257
## 154   154 0.1262073 0.20375241 0.09752350 0.005463187 0.02208191 0.003250070
## 155   155 0.1262034 0.20380067 0.09752149 0.005463287 0.02209506 0.003248892
## 156   156 0.1262023 0.20381833 0.09751753 0.005464455 0.02209686 0.003251020
## 157   157 0.1262058 0.20377730 0.09752006 0.005464224 0.02208234 0.003250305
## 158   158 0.1262066 0.20376829 0.09752177 0.005461336 0.02206925 0.003248812
## 159   159 0.1262076 0.20375611 0.09752366 0.005460250 0.02206564 0.003248059
## 160   160 0.1262103 0.20372445 0.09752663 0.005458698 0.02202558 0.003245804
## 161   161 0.1262111 0.20371422 0.09752690 0.005459543 0.02202359 0.003246306
## 162   162 0.1262102 0.20372134 0.09752614 0.005458645 0.02202366 0.003245864
## 163   163 0.1262109 0.20371360 0.09752644 0.005459111 0.02202303 0.003246147
## 164   164 0.1262107 0.20371699 0.09752633 0.005459230 0.02202556 0.003246293
## [1] "Best Model"
##     nvmax
## 104   104

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  5.0014222113  4.998174e+00  5.004670e+00
## PC1         -0.0016577612 -1.939353e-03 -1.376169e-03
## PC2         -0.0035749327 -3.860566e-03 -3.289299e-03
## PC3         -0.0015492645 -1.835588e-03 -1.262941e-03
## PC4         -0.0012192821 -1.510584e-03 -9.279801e-04
## PC5          0.0007734032  4.716727e-04  1.075134e-03
## PC6         -0.0004590054 -7.604569e-04 -1.575540e-04
## PC7         -0.0007433374 -1.051814e-03 -4.348603e-04
## PC8         -0.0002119523 -5.244417e-04  1.005370e-04
## PC9         -0.0001764765 -4.988891e-04  1.459362e-04
## PC11        -0.0021129296 -2.464339e-03 -1.761520e-03
## PC12        -0.0017534984 -2.125579e-03 -1.381418e-03
## PC13         0.0011482032  7.702755e-04  1.526131e-03
## PC14         0.0009457513  5.550094e-04  1.336493e-03
## PC16         0.0013424822  9.427021e-04  1.742262e-03
## PC17        -0.0008021447 -1.226711e-03 -3.775788e-04
## PC18        -0.0014706365 -1.913788e-03 -1.027485e-03
## PC20         0.0017180699  1.230600e-03  2.205540e-03
## PC21         0.0002442162 -2.628232e-04  7.512556e-04
## PC22         0.0005330761 -2.604048e-04  1.326557e-03
## PC23         0.0012883717  3.112099e-04  2.265533e-03
## PC24        -0.0032749970 -4.424187e-03 -2.125807e-03
## PC25         0.0011059619 -1.760535e-04  2.387977e-03
## PC26         0.0014136753  9.433858e-05  2.733012e-03
## PC27         0.0015993679  2.721977e-04  2.926538e-03
## PC28         0.0005992156 -7.492971e-04  1.947728e-03
## PC29         0.0015253507  4.989974e-05  3.000802e-03
## PC31        -0.0009890918 -2.608820e-03  6.306362e-04
## PC32        -0.0027516041 -4.387448e-03 -1.115760e-03
## PC33         0.0012886128 -3.839173e-04  2.961143e-03
## PC34         0.0040405432  2.272817e-03  5.808269e-03
## PC37        -0.0013894136 -3.368896e-03  5.900690e-04
## PC38         0.0011026820 -9.574556e-04  3.162820e-03
## PC44         0.0017203442 -4.656027e-04  3.906291e-03
## PC45        -0.0013110319 -3.499879e-03  8.778154e-04
## PC47        -0.0015943566 -3.831725e-03  6.430123e-04
## PC48         0.0012636314 -1.018414e-03  3.545677e-03
## PC49         0.0011182781 -1.169225e-03  3.405782e-03
## PC57        -0.0016675666 -4.072766e-03  7.376329e-04
## PC59         0.0024626269  8.613876e-05  4.839115e-03
## PC60        -0.0013218780 -3.747980e-03  1.104224e-03
## PC63        -0.0021916039 -4.637387e-03  2.541793e-04
## PC64        -0.0019642192 -4.420613e-03  4.921741e-04
## PC65        -0.0011496140 -3.600145e-03  1.300917e-03
## PC66        -0.0023281552 -4.811451e-03  1.551403e-04
## PC68         0.0026001944  9.787031e-05  5.102518e-03
## PC69         0.0015913446 -9.243813e-04  4.107070e-03
## PC71         0.0028929184  3.769245e-04  5.408912e-03
## PC74        -0.0016118474 -4.179097e-03  9.554023e-04
## PC75        -0.0034053759 -5.986997e-03 -8.237550e-04
## PC77         0.0017379226 -8.472808e-04  4.323126e-03
## PC78         0.0014921819 -1.102725e-03  4.087089e-03
## PC79         0.0024692020 -1.508188e-04  5.089223e-03
## PC80        -0.0012210840 -3.885507e-03  1.443339e-03
## PC81         0.0038126686  1.157388e-03  6.467949e-03
## PC83        -0.0025480822 -5.199592e-03  1.034278e-04
## PC84         0.0034582002  7.917886e-04  6.124612e-03
## PC85         0.0046426930  1.936655e-03  7.348731e-03
## PC86        -0.0021190342 -4.818604e-03  5.805357e-04
## PC87         0.0079850241  5.253348e-03  1.071670e-02
## PC88        -0.0019026781 -4.673888e-03  8.685318e-04
## PC89        -0.0025753556 -5.300173e-03  1.494620e-04
## PC90        -0.0019867324 -4.723925e-03  7.504599e-04
## PC94        -0.0037767007 -6.541284e-03 -1.012117e-03
## PC96        -0.0038427244 -6.646385e-03 -1.039064e-03
## PC97        -0.0018198439 -4.607061e-03  9.673736e-04
## PC99        -0.0021647930 -4.966449e-03  6.368633e-04
## PC102       -0.0024111155 -5.250977e-03  4.287461e-04
## PC103        0.0028049658 -1.258909e-05  5.622521e-03
## PC104       -0.0037227926 -6.551281e-03 -8.943046e-04
## PC105        0.0030198383  1.816261e-04  5.858050e-03
## PC106        0.0036544913  8.294313e-04  6.479551e-03
## PC109        0.0021413254 -7.045688e-04  4.987220e-03
## PC111       -0.0033544759 -6.218890e-03 -4.900620e-04
## PC113        0.0016187830 -1.250646e-03  4.488212e-03
## PC114       -0.0028390509 -5.702793e-03  2.469165e-05
## PC115       -0.0057876949 -8.671712e-03 -2.903677e-03
## PC118        0.0023517131 -5.459234e-04  5.249350e-03
## PC119       -0.0028830942 -5.776853e-03  1.066510e-05
## PC122        0.0025881068 -3.248141e-04  5.501028e-03
## PC123       -0.0024474000 -5.366401e-03  4.716008e-04
## PC124        0.0016762785 -1.249121e-03  4.601678e-03
## PC125        0.0020406172 -8.936028e-04  4.974837e-03
## PC127        0.0023154697 -5.875423e-04  5.218482e-03
## PC128       -0.0019045648 -4.831840e-03  1.022711e-03
## PC131       -0.0039823516 -6.925024e-03 -1.039679e-03
## PC133       -0.0014211645 -4.374019e-03  1.531690e-03
## PC134        0.0045632507  1.632382e-03  7.494119e-03
## PC135        0.0030116658  6.816370e-05  5.955168e-03
## PC138        0.0018376645 -1.143618e-03  4.818947e-03
## PC139       -0.0034823391 -6.448901e-03 -5.157771e-04
## PC143        0.0020985169 -9.006384e-04  5.097672e-03
## PC144        0.0025130617 -4.779036e-04  5.504027e-03
## PC145        0.0017584098 -1.244241e-03  4.761060e-03
## PC146        0.0046934971  1.678882e-03  7.708112e-03
## PC148       -0.0020385676 -5.021347e-03  9.442115e-04
## PC151        0.0030021294 -3.149765e-05  6.035757e-03
## PC153        0.0033895689  3.694610e-04  6.409677e-03
## PC154       -0.0036163082 -6.651635e-03 -5.809812e-04
## PC155        0.0032510431  2.125490e-04  6.289537e-03
## PC156        0.0033882955  3.410777e-04  6.435513e-03
## PC159        0.0057078538  2.668305e-03  8.747403e-03
## PC161        0.0015612292 -1.479710e-03  4.602168e-03
## PC162       -0.0051649052 -8.241223e-03 -2.088588e-03
## PC163        0.0025996682 -4.684657e-04  5.667802e-03

Test

if (algo.backward.caret == TRUE){
  test.model(model.backward, data.test
             ,method = 'leapBackward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.643   4.961   5.012   5.001   5.049   5.143 
## [1] "leapBackward  Test MSE: 0.0138487130654014"
## [1] "leapBackward  Test RMSE: 0.117680555171198"
## [1] "leapBackward  Test MSE (Org Scale): 82.1848665615533"
## [1] "leapBackward  Test RMSE (Org Scale): 9.06558693971622"

Stepwise Selection with CV

Train

if (algo.stepwise.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapSeq"
                                   ,feature.names = feature.names)
  model.stepwise = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 105 on full training set
## [1] "All models results"
##     nvmax      RMSE   Rsquared        MAE      RMSESD RsquaredSD       MAESD
## 1       1 0.1350468 0.08479660 0.10436184 0.005200049 0.01674113 0.002954202
## 2       2 0.1339628 0.09935862 0.10371363 0.005504447 0.01832821 0.003125743
## 3       3 0.1325279 0.11859544 0.10261175 0.005695304 0.02051365 0.003329322
## 4       4 0.1322772 0.12208289 0.10206422 0.005339563 0.02078229 0.003118272
## 5       5 0.1305639 0.14426308 0.10100557 0.005042205 0.01403455 0.002731516
## 6       6 0.1316835 0.12976030 0.10173813 0.005316761 0.02257861 0.003023353
## 7       7 0.1296458 0.15676998 0.10011934 0.005408872 0.01921091 0.003198788
## 8       8 0.1292860 0.16166820 0.09983246 0.005666917 0.02223766 0.003270292
## 9       9 0.1288372 0.16765598 0.09957128 0.005687598 0.02264859 0.003320170
## 10     10 0.1283193 0.17429662 0.09917594 0.005966020 0.02433558 0.003541938
## 11     11 0.1279362 0.17921957 0.09888127 0.005983356 0.02473557 0.003442295
## 12     12 0.1281969 0.17548221 0.09905509 0.005351854 0.01623643 0.002913054
## 13     13 0.1274936 0.18459777 0.09861919 0.005801784 0.02498022 0.003233391
## 14     14 0.1271615 0.18912503 0.09835234 0.005751381 0.02366383 0.003130328
## 15     15 0.1275661 0.18360676 0.09858862 0.005819577 0.02996811 0.003329440
## 16     16 0.1266694 0.19544374 0.09789322 0.005856941 0.02430375 0.003202948
## 17     17 0.1264706 0.19794585 0.09769118 0.005836174 0.02458856 0.003211562
## 18     18 0.1264844 0.19784441 0.09761652 0.005839495 0.02497885 0.003228483
## 19     19 0.1264275 0.19857322 0.09763926 0.005815373 0.02587255 0.003356488
## 20     20 0.1263052 0.20012438 0.09754751 0.005853704 0.02690377 0.003357624
## 21     21 0.1263884 0.19900583 0.09752871 0.005791377 0.02509730 0.003267652
## 22     22 0.1263051 0.20012161 0.09751512 0.005871821 0.02659607 0.003378924
## 23     23 0.1267050 0.19499902 0.09786119 0.005624592 0.02720583 0.003299839
## 24     24 0.1264146 0.19869760 0.09778952 0.005779477 0.02438728 0.003300492
## 25     25 0.1265097 0.19760003 0.09770993 0.005654214 0.02347045 0.003139757
## 26     26 0.1263927 0.19910993 0.09762567 0.005763593 0.02351943 0.003075068
## 27     27 0.1263814 0.19939537 0.09763183 0.005665547 0.02346896 0.003028282
## 28     28 0.1267634 0.19454072 0.09798707 0.005689976 0.02146966 0.002919610
## 29     29 0.1265454 0.19753824 0.09779892 0.005725031 0.02481153 0.003079489
## 30     30 0.1265390 0.19769106 0.09782618 0.005719897 0.02490035 0.003116986
## 31     31 0.1265541 0.19751539 0.09775613 0.005613531 0.02382063 0.003060166
## 32     32 0.1265618 0.19748093 0.09775847 0.005632815 0.02439437 0.003037837
## 33     33 0.1266044 0.19708571 0.09776977 0.005623306 0.02367647 0.002977068
## 34     34 0.1266189 0.19706632 0.09778340 0.005575182 0.02371033 0.002961167
## 35     35 0.1267466 0.19550167 0.09788808 0.005532787 0.02223223 0.002938956
## 36     36 0.1267695 0.19528058 0.09793055 0.005501084 0.02251962 0.002907074
## 37     37 0.1267420 0.19564293 0.09790417 0.005452299 0.02218800 0.002888763
## 38     38 0.1266638 0.19647283 0.09790816 0.005330265 0.02205737 0.002762618
## 39     39 0.1266917 0.19610127 0.09784108 0.005447161 0.02289125 0.002866402
## 40     40 0.1266227 0.19715789 0.09789099 0.005471754 0.02366100 0.002851723
## 41     41 0.1266699 0.19660533 0.09790882 0.005442517 0.02268889 0.002774760
## 42     42 0.1266494 0.19690176 0.09790198 0.005535586 0.02286920 0.002815589
## 43     43 0.1266819 0.19639631 0.09786647 0.005533537 0.02298357 0.002805611
## 44     44 0.1266156 0.19713052 0.09784906 0.005436428 0.02209167 0.002719508
## 45     45 0.1264311 0.19935758 0.09788548 0.005371560 0.02153190 0.002703640
## 46     46 0.1265030 0.19873032 0.09783505 0.005437657 0.02215237 0.002742781
## 47     47 0.1264665 0.19923416 0.09779211 0.005412107 0.02339531 0.002787665
## 48     48 0.1265336 0.19846547 0.09786012 0.005628169 0.02502365 0.003028937
## 49     49 0.1264494 0.19952905 0.09777268 0.005453521 0.02401244 0.002891190
## 50     50 0.1264070 0.19990081 0.09789331 0.005463733 0.02389122 0.002946627
## 51     51 0.1265382 0.19858572 0.09784181 0.005406872 0.02427270 0.002905345
## 52     52 0.1265645 0.19840160 0.09790344 0.005510323 0.02585609 0.002960722
## 53     53 0.1267176 0.19649334 0.09801672 0.005422541 0.02077710 0.002775333
## 54     54 0.1263742 0.20066805 0.09771197 0.005452019 0.02599711 0.002918154
## 55     55 0.1266051 0.19763710 0.09790074 0.005789355 0.02821974 0.003196533
## 56     56 0.1265556 0.19841681 0.09775886 0.005709270 0.02677930 0.003060421
## 57     57 0.1265227 0.19898943 0.09782542 0.005672396 0.02662914 0.003099303
## 58     58 0.1264144 0.19997077 0.09775297 0.005629872 0.02573608 0.003170875
## 59     59 0.1264182 0.20001696 0.09775194 0.005797404 0.02671245 0.003289860
## 60     60 0.1264646 0.19988603 0.09761342 0.005503591 0.02442307 0.003108297
## 61     61 0.1265138 0.19900750 0.09765880 0.005556988 0.02601605 0.003089252
## 62     62 0.1264087 0.20052054 0.09765076 0.005458976 0.02479817 0.003043535
## 63     63 0.1263366 0.20129863 0.09754349 0.005451967 0.02435507 0.003076678
## 64     64 0.1264259 0.20031310 0.09763725 0.005424525 0.02429027 0.003013452
## 65     65 0.1263915 0.20074182 0.09762058 0.005474633 0.02472960 0.003067736
## 66     66 0.1265739 0.19831297 0.09787518 0.005459195 0.01984935 0.002945129
## 67     67 0.1263352 0.20143800 0.09762473 0.005419271 0.02429941 0.003044765
## 68     68 0.1263604 0.20111577 0.09767420 0.005388026 0.02386378 0.003004040
## 69     69 0.1263256 0.20149491 0.09766525 0.005393875 0.02350204 0.003042984
## 70     70 0.1263002 0.20181473 0.09765565 0.005374837 0.02309418 0.003011970
## 71     71 0.1264214 0.20012020 0.09769444 0.005496297 0.02441083 0.003104155
## 72     72 0.1263507 0.20134278 0.09765753 0.005409843 0.02326292 0.003029063
## 73     73 0.1263248 0.20132052 0.09774986 0.005412939 0.02402634 0.003091156
## 74     74 0.1262823 0.20213249 0.09763507 0.005380452 0.02341940 0.003051790
## 75     75 0.1263350 0.20120931 0.09777582 0.005307162 0.02260807 0.003082305
## 76     76 0.1263365 0.20127569 0.09769852 0.005358018 0.02270595 0.002991455
## 77     77 0.1262740 0.20231825 0.09755218 0.005297567 0.02210017 0.003043057
## 78     78 0.1262020 0.20299214 0.09749060 0.005452566 0.02256629 0.003352238
## 79     79 0.1263419 0.20138368 0.09768639 0.005361393 0.02297008 0.003068239
## 80     80 0.1262581 0.20242680 0.09759276 0.005346043 0.02206966 0.003104953
## 81     81 0.1261947 0.20320912 0.09756195 0.005370144 0.02142310 0.003045244
## 82     82 0.1262659 0.20207048 0.09762445 0.005378114 0.02354856 0.003131447
## 83     83 0.1261996 0.20313799 0.09741336 0.005420777 0.02150601 0.003382701
## 84     84 0.1263164 0.20174346 0.09762979 0.005661491 0.02340628 0.003298319
## 85     85 0.1261801 0.20351312 0.09750636 0.005366685 0.02186587 0.003144886
## 86     86 0.1261970 0.20298678 0.09749973 0.005511911 0.02358881 0.003436087
## 87     87 0.1262338 0.20292624 0.09745802 0.005705640 0.02566215 0.003468153
## 88     88 0.1262483 0.20249283 0.09757617 0.005370057 0.02277543 0.003130502
## 89     89 0.1261955 0.20335164 0.09746301 0.005393621 0.02259959 0.003139729
## 90     90 0.1261934 0.20336665 0.09747709 0.005363438 0.02261102 0.003123915
## 91     91 0.1264311 0.20059007 0.09762620 0.005652167 0.02467497 0.003284455
## 92     92 0.1263402 0.20160197 0.09756722 0.005326981 0.01920001 0.003055152
## 93     93 0.1261621 0.20377905 0.09742235 0.005346787 0.02257374 0.003090141
## 94     94 0.1261084 0.20441607 0.09739394 0.005335749 0.02319458 0.003102660
## 95     95 0.1264174 0.20066596 0.09761633 0.005612220 0.02117898 0.003247363
## 96     96 0.1263164 0.20160242 0.09747864 0.005393009 0.01988180 0.003042558
## 97     97 0.1260800 0.20479082 0.09739708 0.005360230 0.02250364 0.003048303
## 98     98 0.1260321 0.20510429 0.09734445 0.005369648 0.02292541 0.003062055
## 99     99 0.1260488 0.20496114 0.09736048 0.005422419 0.02276596 0.003102894
## 100   100 0.1259479 0.20612362 0.09728252 0.005417393 0.02255106 0.003113881
## 101   101 0.1260789 0.20478302 0.09743580 0.005672517 0.02360294 0.003332230
## 102   102 0.1259519 0.20631170 0.09729305 0.005411918 0.02220200 0.003113944
## 103   103 0.1259241 0.20663886 0.09728295 0.005434298 0.02236920 0.003148814
## 104   104 0.1259247 0.20625351 0.09737047 0.005440436 0.02334962 0.003172490
## 105   105 0.1259052 0.20691319 0.09728686 0.005450328 0.02273456 0.003193748
## 106   106 0.1259107 0.20687083 0.09728522 0.005454628 0.02284862 0.003198774
## 107   107 0.1260101 0.20568038 0.09744929 0.005601612 0.02401082 0.003431009
## 108   108 0.1259642 0.20628464 0.09733366 0.005451393 0.02285053 0.003210432
## 109   109 0.1259234 0.20664633 0.09727907 0.005519219 0.02299484 0.003275447
## 110   110 0.1259679 0.20631675 0.09732433 0.005452312 0.02253926 0.003188762
## 111   111 0.1260582 0.20535046 0.09740466 0.005464953 0.02291757 0.003200165
## 112   112 0.1259680 0.20629315 0.09730748 0.005372698 0.02230929 0.003141726
## 113   113 0.1259640 0.20616394 0.09732440 0.005390251 0.02248070 0.003143256
## 114   114 0.1260355 0.20535300 0.09733967 0.005354253 0.02291612 0.003179052
## 115   115 0.1261254 0.20409338 0.09731557 0.005366230 0.01961667 0.003083218
## 116   116 0.1260079 0.20583256 0.09731536 0.005373486 0.02195621 0.003168683
## 117   117 0.1260436 0.20544271 0.09734919 0.005381737 0.02214890 0.003172554
## 118   118 0.1260755 0.20509228 0.09737181 0.005379707 0.02229585 0.003181631
## 119   119 0.1260895 0.20493554 0.09738733 0.005390235 0.02226188 0.003183898
## 120   120 0.1263104 0.20220053 0.09758767 0.005364904 0.01936782 0.003103567
## 121   121 0.1260962 0.20472831 0.09736875 0.005415717 0.02214818 0.003287637
## 122   122 0.1261143 0.20470180 0.09741450 0.005400070 0.02232558 0.003205105
## 123   123 0.1261227 0.20461441 0.09742624 0.005400570 0.02239069 0.003201025
## 124   124 0.1261518 0.20429543 0.09745291 0.005401608 0.02242096 0.003192159
## 125   125 0.1261578 0.20424997 0.09746621 0.005426437 0.02270965 0.003225716
## 126   126 0.1262953 0.20257515 0.09754172 0.005414468 0.01938043 0.003185837
## 127   127 0.1261696 0.20413922 0.09746916 0.005456752 0.02279187 0.003256842
## 128   128 0.1261852 0.20396915 0.09749103 0.005445025 0.02258067 0.003243312
## 129   129 0.1262147 0.20356858 0.09753423 0.005488625 0.02285205 0.003325360
## 130   130 0.1263447 0.20176081 0.09759023 0.005456417 0.02307641 0.003211006
## 131   131 0.1263736 0.20171757 0.09760836 0.005442613 0.01887451 0.003171437
## 132   132 0.1261836 0.20401152 0.09748293 0.005452311 0.02268762 0.003255315
## 133   133 0.1261815 0.20401891 0.09747862 0.005445579 0.02247635 0.003247413
## 134   134 0.1261679 0.20413308 0.09754349 0.005494370 0.02291241 0.003280477
## 135   135 0.1262091 0.20372009 0.09749865 0.005434270 0.02239300 0.003260230
## 136   136 0.1262480 0.20322034 0.09750876 0.005439350 0.02202279 0.003263059
## 137   137 0.1261996 0.20384482 0.09748329 0.005463124 0.02247554 0.003277424
## 138   138 0.1260828 0.20500834 0.09743437 0.005464973 0.02236168 0.003256900
## 139   139 0.1261849 0.20371541 0.09752111 0.005545615 0.02305477 0.003364675
## 140   140 0.1262163 0.20365760 0.09750413 0.005445294 0.02225625 0.003261575
## 141   141 0.1262184 0.20362681 0.09749855 0.005442033 0.02214024 0.003253918
## 142   142 0.1262113 0.20370244 0.09750014 0.005437250 0.02213571 0.003249088
## 143   143 0.1263053 0.20262855 0.09763133 0.005445772 0.02297824 0.003233102
## 144   144 0.1262141 0.20367935 0.09750982 0.005435561 0.02206687 0.003248656
## 145   145 0.1262001 0.20374020 0.09749636 0.005455758 0.02205134 0.003286755
## 146   146 0.1261814 0.20394764 0.09749285 0.005444576 0.02203457 0.003244521
## 147   147 0.1262041 0.20378497 0.09751183 0.005461005 0.02219693 0.003258773
## 148   148 0.1261995 0.20384308 0.09751107 0.005465376 0.02218786 0.003265796
## 149   149 0.1262357 0.20335516 0.09755440 0.005450382 0.02284001 0.003274319
## 150   150 0.1262360 0.20343283 0.09755822 0.005537977 0.02257121 0.003341582
## 151   151 0.1261815 0.20399123 0.09746637 0.005466393 0.02208943 0.003255334
## 152   152 0.1262037 0.20378890 0.09751525 0.005462154 0.02208358 0.003251597
## 153   153 0.1262071 0.20375273 0.09752227 0.005465137 0.02207422 0.003253257
## 154   154 0.1263044 0.20258269 0.09759268 0.005451627 0.01997249 0.003208215
## 155   155 0.1262226 0.20347248 0.09756220 0.005477332 0.02224766 0.003305405
## 156   156 0.1262266 0.20347780 0.09755820 0.005409539 0.02160416 0.003155794
## 157   157 0.1262352 0.20332987 0.09754736 0.005486054 0.02230274 0.003287724
## 158   158 0.1262066 0.20376829 0.09752177 0.005461336 0.02206925 0.003248812
## 159   159 0.1262521 0.20331827 0.09758682 0.005460783 0.02232412 0.003230028
## 160   160 0.1262673 0.20308715 0.09761758 0.005446711 0.02135365 0.003222636
## 161   161 0.1261725 0.20408172 0.09750785 0.005462554 0.02197681 0.003258521
## 162   162 0.1262127 0.20363342 0.09752853 0.005431786 0.02181612 0.003215356
## 163   163 0.1262045 0.20378760 0.09751702 0.005461890 0.02212859 0.003251503
## 164   164 0.1262107 0.20371699 0.09752633 0.005459230 0.02202556 0.003246293
## [1] "Best Model"
##     nvmax
## 105   105

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  5.0014260845  4.998178e+00  5.004674e+00
## PC1         -0.0016578295 -1.939429e-03 -1.376230e-03
## PC2         -0.0035750955 -3.860737e-03 -3.289454e-03
## PC3         -0.0015501926 -1.836532e-03 -1.263853e-03
## PC4         -0.0012180774 -1.509401e-03 -9.267541e-04
## PC5          0.0007735608  4.718221e-04  1.075300e-03
## PC6         -0.0004595017 -7.609634e-04 -1.580401e-04
## PC7         -0.0007431548 -1.051640e-03 -4.346692e-04
## PC8         -0.0002118816 -5.243793e-04  1.006161e-04
## PC9         -0.0001761755 -4.985975e-04  1.462465e-04
## PC11        -0.0021135209 -2.464943e-03 -1.762099e-03
## PC12        -0.0017536810 -2.125771e-03 -1.381591e-03
## PC13         0.0011491789  7.712343e-04  1.527123e-03
## PC14         0.0009467031  5.559444e-04  1.337462e-03
## PC16         0.0013417027  9.419079e-04  1.741498e-03
## PC17        -0.0008029682 -1.227550e-03 -3.783867e-04
## PC18        -0.0014706461 -1.913809e-03 -1.027483e-03
## PC20         0.0017173999  1.229914e-03  2.204886e-03
## PC21         0.0002436889 -2.633654e-04  7.507433e-04
## PC22         0.0005313850 -2.621268e-04  1.324897e-03
## PC23         0.0012901432  3.129468e-04  2.267340e-03
## PC24        -0.0032796404 -4.428912e-03 -2.130369e-03
## PC25         0.0011063330 -1.757167e-04  2.388383e-03
## PC26         0.0014168399  9.744754e-05  2.736232e-03
## PC27         0.0015982836  2.710758e-04  2.925491e-03
## PC28         0.0006027089 -7.458642e-04  1.951282e-03
## PC29         0.0015286200  5.311022e-05  3.004130e-03
## PC31        -0.0009951147 -2.614946e-03  6.247170e-04
## PC32        -0.0027512834 -4.387171e-03 -1.115396e-03
## PC33         0.0012921149 -3.804795e-04  2.964709e-03
## PC34         0.0040388349  2.271057e-03  5.806613e-03
## PC37        -0.0013890918 -3.368627e-03  5.904436e-04
## PC38         0.0011074596 -9.527628e-04  3.167682e-03
## PC44         0.0017152667 -4.707702e-04  3.901304e-03
## PC45        -0.0013005600 -3.489601e-03  8.884812e-04
## PC47        -0.0015957827 -3.833214e-03  6.416481e-04
## PC48         0.0012556465 -1.026536e-03  3.537829e-03
## PC49         0.0011123770 -1.175229e-03  3.399983e-03
## PC57        -0.0016637368 -4.069017e-03  7.415431e-04
## PC59         0.0024513197  7.462271e-05  4.828017e-03
## PC60        -0.0013234917 -3.749661e-03  1.102678e-03
## PC63        -0.0021920542 -4.637903e-03  2.537943e-04
## PC64        -0.0019720970 -4.428624e-03  4.844301e-04
## PC65        -0.0011485676 -3.599165e-03  1.302029e-03
## PC66        -0.0023344523 -4.817857e-03  1.489525e-04
## PC68         0.0026013386  9.894665e-05  5.103731e-03
## PC69         0.0016019835 -9.139311e-04  4.117898e-03
## PC71         0.0028849599  3.688310e-04  5.401089e-03
## PC74        -0.0016045929 -4.171966e-03  9.627805e-04
## PC75        -0.0034056174 -5.987307e-03 -8.239278e-04
## PC77         0.0017444035 -8.409127e-04  4.329720e-03
## PC78         0.0014906992 -1.104279e-03  4.085678e-03
## PC79         0.0024646909 -1.554206e-04  5.084802e-03
## PC80        -0.0012272159 -3.891748e-03  1.437317e-03
## PC81         0.0038008121  1.145317e-03  6.456307e-03
## PC83        -0.0025461363 -5.197721e-03  1.054481e-04
## PC84         0.0034510898  7.845560e-04  6.117624e-03
## PC85         0.0046356082  1.929448e-03  7.341768e-03
## PC86        -0.0021140182 -4.813685e-03  5.856487e-04
## PC87         0.0079917487  5.259955e-03  1.072354e-02
## PC88        -0.0019045359 -4.675823e-03  8.667510e-04
## PC89        -0.0025765316 -5.301423e-03  1.483598e-04
## PC90        -0.0019874258 -4.724691e-03  7.498397e-04
## PC94        -0.0037782715 -6.542931e-03 -1.013612e-03
## PC96        -0.0038403925 -6.644133e-03 -1.036652e-03
## PC97        -0.0018094836 -4.596880e-03  9.779123e-04
## PC99        -0.0021723584 -4.974145e-03  6.294277e-04
## PC102       -0.0024153961 -5.255351e-03  4.245584e-04
## PC103        0.0028025654 -1.507000e-05  5.620201e-03
## PC104       -0.0037283368 -6.556930e-03 -8.997441e-04
## PC105        0.0030271855  1.888463e-04  5.865525e-03
## PC106        0.0036465850  8.213900e-04  6.471780e-03
## PC107        0.0012164069 -1.615229e-03  4.048043e-03
## PC109        0.0021445414 -7.014383e-04  4.990521e-03
## PC111       -0.0033480307 -6.212560e-03 -4.835014e-04
## PC113        0.0016276097 -1.241969e-03  4.497188e-03
## PC114       -0.0028422939 -5.706122e-03  2.153468e-05
## PC115       -0.0057720542 -8.656378e-03 -2.887730e-03
## PC118        0.0023483540 -5.493700e-04  5.246078e-03
## PC119       -0.0028879482 -5.781806e-03  5.910083e-06
## PC122        0.0025859779 -3.270246e-04  5.498980e-03
## PC123       -0.0024398312 -5.358963e-03  4.793004e-04
## PC124        0.0016884637 -1.237151e-03  4.614079e-03
## PC125        0.0020562283 -8.782947e-04  4.990751e-03
## PC127        0.0023169044 -5.861867e-04  5.219995e-03
## PC128       -0.0019080380 -4.835402e-03  1.019326e-03
## PC131       -0.0039830944 -6.925845e-03 -1.040343e-03
## PC133       -0.0014171974 -4.370144e-03  1.535750e-03
## PC134        0.0045785462  1.647384e-03  7.509709e-03
## PC135        0.0030155686  7.197429e-05  5.959163e-03
## PC138        0.0018387452 -1.142617e-03  4.820108e-03
## PC139       -0.0034859165 -6.452569e-03 -5.192640e-04
## PC143        0.0020890014 -9.103155e-04  5.088318e-03
## PC144        0.0025042869 -4.868277e-04  5.495401e-03
## PC145        0.0017490944 -1.253714e-03  4.751903e-03
## PC146        0.0046938976  1.679202e-03  7.708593e-03
## PC148       -0.0020388941 -5.021753e-03  9.439644e-04
## PC151        0.0030104170 -2.335209e-05  6.044186e-03
## PC153        0.0033871778  3.669846e-04  6.407371e-03
## PC154       -0.0036129566 -6.648374e-03 -5.775390e-04
## PC155        0.0032444401  2.058263e-04  6.283054e-03
## PC156        0.0033817945  3.344581e-04  6.429131e-03
## PC159        0.0057040318  2.664389e-03  8.743675e-03
## PC161        0.0015546860 -1.486372e-03  4.595744e-03
## PC162       -0.0051450187 -8.221766e-03 -2.068271e-03
## PC163        0.0025833525 -4.850980e-04  5.651803e-03

Test

if (algo.stepwise.caret == TRUE){
  test.model(model.stepwise, data.test
             ,method = 'leapSeq',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
  
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.639   4.961   5.012   5.001   5.049   5.143 
## [1] "leapSeq  Test MSE: 0.013858057909617"
## [1] "leapSeq  Test RMSE: 0.11772025275889"
## [1] "leapSeq  Test MSE (Org Scale): 82.2350242465771"
## [1] "leapSeq  Test RMSE (Org Scale): 9.06835289601023"

LASSO with CV

Train

if (algo.LASSO.caret == TRUE){
  set.seed(1)
  tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "glmnet"
                                   ,subopt = 'LASSO'
                                   ,tune.grid = tune.grid
                                   ,feature.names = feature.names)
  model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.00102 on full training set
## glmnet 
## 
## 5584 samples
##  164 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE       Rsquared   MAE       
##   0.0001000000  0.1260825  0.2046718  0.09742207
##   0.0001047616  0.1260767  0.2047156  0.09741752
##   0.0001097499  0.1260708  0.2047609  0.09741279
##   0.0001149757  0.1260645  0.2048081  0.09740790
##   0.0001204504  0.1260582  0.2048560  0.09740292
##   0.0001261857  0.1260516  0.2049055  0.09739775
##   0.0001321941  0.1260448  0.2049569  0.09739235
##   0.0001384886  0.1260377  0.2050105  0.09738675
##   0.0001450829  0.1260303  0.2050659  0.09738098
##   0.0001519911  0.1260227  0.2051234  0.09737515
##   0.0001592283  0.1260148  0.2051830  0.09736920
##   0.0001668101  0.1260066  0.2052448  0.09736302
##   0.0001747528  0.1259980  0.2053104  0.09735658
##   0.0001830738  0.1259891  0.2053793  0.09734977
##   0.0001917910  0.1259797  0.2054517  0.09734272
##   0.0002009233  0.1259700  0.2055270  0.09733545
##   0.0002104904  0.1259600  0.2056047  0.09732812
##   0.0002205131  0.1259497  0.2056852  0.09732055
##   0.0002310130  0.1259390  0.2057689  0.09731291
##   0.0002420128  0.1259280  0.2058557  0.09730499
##   0.0002535364  0.1259168  0.2059431  0.09729703
##   0.0002656088  0.1259053  0.2060334  0.09728883
##   0.0002782559  0.1258934  0.2061281  0.09728025
##   0.0002915053  0.1258810  0.2062268  0.09727120
##   0.0003053856  0.1258682  0.2063303  0.09726176
##   0.0003199267  0.1258550  0.2064371  0.09725204
##   0.0003351603  0.1258415  0.2065464  0.09724210
##   0.0003511192  0.1258278  0.2066587  0.09723194
##   0.0003678380  0.1258139  0.2067727  0.09722197
##   0.0003853529  0.1257997  0.2068898  0.09721178
##   0.0004037017  0.1257851  0.2070103  0.09720176
##   0.0004229243  0.1257704  0.2071331  0.09719162
##   0.0004430621  0.1257553  0.2072603  0.09718129
##   0.0004641589  0.1257399  0.2073924  0.09717069
##   0.0004862602  0.1257243  0.2075271  0.09715995
##   0.0005094138  0.1257084  0.2076660  0.09714871
##   0.0005336699  0.1256923  0.2078087  0.09713750
##   0.0005590810  0.1256764  0.2079523  0.09712627
##   0.0005857021  0.1256604  0.2080982  0.09711571
##   0.0006135907  0.1256446  0.2082450  0.09710548
##   0.0006428073  0.1256296  0.2083851  0.09709652
##   0.0006734151  0.1256150  0.2085234  0.09708817
##   0.0007054802  0.1256009  0.2086592  0.09708034
##   0.0007390722  0.1255874  0.2087930  0.09707266
##   0.0007742637  0.1255748  0.2089201  0.09706635
##   0.0008111308  0.1255631  0.2090421  0.09706078
##   0.0008497534  0.1255529  0.2091508  0.09705710
##   0.0008902151  0.1255442  0.2092480  0.09705475
##   0.0009326033  0.1255385  0.2093165  0.09705483
##   0.0009770100  0.1255348  0.2093673  0.09705697
##   0.0010235310  0.1255337  0.2093957  0.09706146
##   0.0010722672  0.1255346  0.2094072  0.09706701
##   0.0011233240  0.1255399  0.2093726  0.09707475
##   0.0011768120  0.1255490  0.2093014  0.09708430
##   0.0012328467  0.1255613  0.2092006  0.09709614
##   0.0012915497  0.1255774  0.2090634  0.09711099
##   0.0013530478  0.1255955  0.2089150  0.09712797
##   0.0014174742  0.1256171  0.2087344  0.09714789
##   0.0014849683  0.1256417  0.2085341  0.09717050
##   0.0015556761  0.1256707  0.2082931  0.09719777
##   0.0016297508  0.1257016  0.2080455  0.09722851
##   0.0017073526  0.1257367  0.2077616  0.09726185
##   0.0017886495  0.1257707  0.2075144  0.09729153
##   0.0018738174  0.1258088  0.2072329  0.09732378
##   0.0019630407  0.1258486  0.2069499  0.09735462
##   0.0020565123  0.1258942  0.2066113  0.09738900
##   0.0021544347  0.1259445  0.2062318  0.09742628
##   0.0022570197  0.1260016  0.2057861  0.09746946
##   0.0023644894  0.1260615  0.2053221  0.09751627
##   0.0024770764  0.1261283  0.2047877  0.09756748
##   0.0025950242  0.1261999  0.2042033  0.09762418
##   0.0027185882  0.1262791  0.2035363  0.09768572
##   0.0028480359  0.1263649  0.2027990  0.09775115
##   0.0029836472  0.1264593  0.2019681  0.09782125
##   0.0031257158  0.1265548  0.2011568  0.09789386
##   0.0032745492  0.1266571  0.2002761  0.09797469
##   0.0034304693  0.1267602  0.1994136  0.09805780
##   0.0035938137  0.1268722  0.1984503  0.09814836
##   0.0037649358  0.1269875  0.1974720  0.09824250
##   0.0039442061  0.1271124  0.1963887  0.09834316
##   0.0041320124  0.1272433  0.1952556  0.09844567
##   0.0043287613  0.1273861  0.1939813  0.09855466
##   0.0045348785  0.1275330  0.1926951  0.09866591
##   0.0047508102  0.1276909  0.1912852  0.09878621
##   0.0049770236  0.1278541  0.1898300  0.09890973
##   0.0052140083  0.1280312  0.1881955  0.09904293
##   0.0054622772  0.1282237  0.1863484  0.09919031
##   0.0057223677  0.1284315  0.1842822  0.09935079
##   0.0059948425  0.1286470  0.1821334  0.09951832
##   0.0062802914  0.1288776  0.1797575  0.09969603
##   0.0065793322  0.1291204  0.1772135  0.09988561
##   0.0068926121  0.1293819  0.1743770  0.10008914
##   0.0072208090  0.1296552  0.1713683  0.10029652
##   0.0075646333  0.1299499  0.1679861  0.10051799
##   0.0079248290  0.1302446  0.1646172  0.10073492
##   0.0083021757  0.1305541  0.1609747  0.10096336
##   0.0086974900  0.1308615  0.1573733  0.10119179
##   0.0091116276  0.1311845  0.1534685  0.10143495
##   0.0095454846  0.1314959  0.1497763  0.10167206
##   0.0100000000  0.1318179  0.1458583  0.10191631
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.001023531.

##    alpha      lambda
## 51     1 0.001023531
##     alpha       lambda      RMSE  Rsquared        MAE      RMSESD RsquaredSD       MAESD
## 1       1 0.0001000000 0.1260825 0.2046718 0.09742207 0.005468723 0.02236901 0.003236068
## 2       1 0.0001047616 0.1260767 0.2047156 0.09741752 0.005469059 0.02238403 0.003235452
## 3       1 0.0001097499 0.1260708 0.2047609 0.09741279 0.005469400 0.02239956 0.003234806
## 4       1 0.0001149757 0.1260645 0.2048081 0.09740790 0.005469750 0.02241571 0.003234135
## 5       1 0.0001204504 0.1260582 0.2048560 0.09740292 0.005470043 0.02243135 0.003233384
## 6       1 0.0001261857 0.1260516 0.2049055 0.09739775 0.005470325 0.02244778 0.003232585
## 7       1 0.0001321941 0.1260448 0.2049569 0.09739235 0.005470549 0.02246373 0.003231665
## 8       1 0.0001384886 0.1260377 0.2050105 0.09738675 0.005470760 0.02247970 0.003230658
## 9       1 0.0001450829 0.1260303 0.2050659 0.09738098 0.005470969 0.02249613 0.003229564
## 10      1 0.0001519911 0.1260227 0.2051234 0.09737515 0.005471189 0.02251318 0.003228480
## 11      1 0.0001592283 0.1260148 0.2051830 0.09736920 0.005471406 0.02253119 0.003227360
## 12      1 0.0001668101 0.1260066 0.2052448 0.09736302 0.005471630 0.02255043 0.003226204
## 13      1 0.0001747528 0.1259980 0.2053104 0.09735658 0.005471890 0.02257141 0.003225041
## 14      1 0.0001830738 0.1259891 0.2053793 0.09734977 0.005472167 0.02259350 0.003223823
## 15      1 0.0001917910 0.1259797 0.2054517 0.09734272 0.005472486 0.02261693 0.003222536
## 16      1 0.0002009233 0.1259700 0.2055270 0.09733545 0.005472835 0.02264126 0.003221237
## 17      1 0.0002104904 0.1259600 0.2056047 0.09732812 0.005473159 0.02266722 0.003220013
## 18      1 0.0002205131 0.1259497 0.2056852 0.09732055 0.005473483 0.02269446 0.003218792
## 19      1 0.0002310130 0.1259390 0.2057689 0.09731291 0.005473837 0.02272310 0.003217232
## 20      1 0.0002420128 0.1259280 0.2058557 0.09730499 0.005474230 0.02275304 0.003215534
## 21      1 0.0002535364 0.1259168 0.2059431 0.09729703 0.005474525 0.02278199 0.003213447
## 22      1 0.0002656088 0.1259053 0.2060334 0.09728883 0.005474803 0.02281248 0.003211119
## 23      1 0.0002782559 0.1258934 0.2061281 0.09728025 0.005474981 0.02284316 0.003208638
## 24      1 0.0002915053 0.1258810 0.2062268 0.09727120 0.005475135 0.02287516 0.003206036
## 25      1 0.0003053856 0.1258682 0.2063303 0.09726176 0.005475155 0.02290682 0.003203253
## 26      1 0.0003199267 0.1258550 0.2064371 0.09725204 0.005475209 0.02293929 0.003200312
## 27      1 0.0003351603 0.1258415 0.2065464 0.09724210 0.005475598 0.02297395 0.003197246
## 28      1 0.0003511192 0.1258278 0.2066587 0.09723194 0.005476096 0.02300981 0.003194303
## 29      1 0.0003678380 0.1258139 0.2067727 0.09722197 0.005476928 0.02304752 0.003191193
## 30      1 0.0003853529 0.1257997 0.2068898 0.09721178 0.005477806 0.02308594 0.003187963
## 31      1 0.0004037017 0.1257851 0.2070103 0.09720176 0.005478945 0.02312617 0.003184618
## 32      1 0.0004229243 0.1257704 0.2071331 0.09719162 0.005480205 0.02316812 0.003181095
## 33      1 0.0004430621 0.1257553 0.2072603 0.09718129 0.005481915 0.02321066 0.003177424
## 34      1 0.0004641589 0.1257399 0.2073924 0.09717069 0.005483676 0.02325166 0.003173594
## 35      1 0.0004862602 0.1257243 0.2075271 0.09715995 0.005485119 0.02328592 0.003169007
## 36      1 0.0005094138 0.1257084 0.2076660 0.09714871 0.005486564 0.02331956 0.003164249
## 37      1 0.0005336699 0.1256923 0.2078087 0.09713750 0.005487599 0.02335295 0.003159006
## 38      1 0.0005590810 0.1256764 0.2079523 0.09712627 0.005488686 0.02339021 0.003153440
## 39      1 0.0005857021 0.1256604 0.2080982 0.09711571 0.005489928 0.02343134 0.003146378
## 40      1 0.0006135907 0.1256446 0.2082450 0.09710548 0.005491300 0.02347601 0.003139060
## 41      1 0.0006428073 0.1256296 0.2083851 0.09709652 0.005493340 0.02352508 0.003131897
## 42      1 0.0006734151 0.1256150 0.2085234 0.09708817 0.005495410 0.02357576 0.003123739
## 43      1 0.0007054802 0.1256009 0.2086592 0.09708034 0.005496740 0.02361655 0.003114351
## 44      1 0.0007390722 0.1255874 0.2087930 0.09707266 0.005498020 0.02365332 0.003104944
## 45      1 0.0007742637 0.1255748 0.2089201 0.09706635 0.005500343 0.02369586 0.003096681
## 46      1 0.0008111308 0.1255631 0.2090421 0.09706078 0.005502852 0.02373917 0.003088597
## 47      1 0.0008497534 0.1255529 0.2091508 0.09705710 0.005505630 0.02378900 0.003081498
## 48      1 0.0008902151 0.1255442 0.2092480 0.09705475 0.005508493 0.02384071 0.003073773
## 49      1 0.0009326033 0.1255385 0.2093165 0.09705483 0.005511910 0.02388925 0.003066434
## 50      1 0.0009770100 0.1255348 0.2093673 0.09705697 0.005515382 0.02394078 0.003060203
## 51      1 0.0010235310 0.1255337 0.2093957 0.09706146 0.005518812 0.02399187 0.003053976
## 52      1 0.0010722672 0.1255346 0.2094072 0.09706701 0.005522158 0.02404079 0.003047697
## 53      1 0.0011233240 0.1255399 0.2093726 0.09707475 0.005525456 0.02407587 0.003041392
## 54      1 0.0011768120 0.1255490 0.2093014 0.09708430 0.005529051 0.02411405 0.003035638
## 55      1 0.0012328467 0.1255613 0.2092006 0.09709614 0.005534578 0.02417738 0.003033481
## 56      1 0.0012915497 0.1255774 0.2090634 0.09711099 0.005540552 0.02424070 0.003031494
## 57      1 0.0013530478 0.1255955 0.2089150 0.09712797 0.005546234 0.02430813 0.003029750
## 58      1 0.0014174742 0.1256171 0.2087344 0.09714789 0.005551959 0.02437253 0.003029249
## 59      1 0.0014849683 0.1256417 0.2085341 0.09717050 0.005560154 0.02443063 0.003031508
## 60      1 0.0015556761 0.1256707 0.2082931 0.09719777 0.005569661 0.02448043 0.003033078
## 61      1 0.0016297508 0.1257016 0.2080455 0.09722851 0.005580272 0.02452286 0.003032948
## 62      1 0.0017073526 0.1257367 0.2077616 0.09726185 0.005591886 0.02456525 0.003033655
## 63      1 0.0017886495 0.1257707 0.2075144 0.09729153 0.005603542 0.02466919 0.003034926
## 64      1 0.0018738174 0.1258088 0.2072329 0.09732378 0.005615403 0.02477067 0.003036598
## 65      1 0.0019630407 0.1258486 0.2069499 0.09735462 0.005626519 0.02490640 0.003038752
## 66      1 0.0020565123 0.1258942 0.2066113 0.09738900 0.005637251 0.02502849 0.003039624
## 67      1 0.0021544347 0.1259445 0.2062318 0.09742628 0.005648284 0.02512498 0.003040915
## 68      1 0.0022570197 0.1260016 0.2057861 0.09746946 0.005660299 0.02521097 0.003041832
## 69      1 0.0023644894 0.1260615 0.2053221 0.09751627 0.005669783 0.02524201 0.003043284
## 70      1 0.0024770764 0.1261283 0.2047877 0.09756748 0.005678586 0.02523420 0.003044600
## 71      1 0.0025950242 0.1261999 0.2042033 0.09762418 0.005682640 0.02512431 0.003044469
## 72      1 0.0027185882 0.1262791 0.2035363 0.09768572 0.005686554 0.02498208 0.003046624
## 73      1 0.0028480359 0.1263649 0.2027990 0.09775115 0.005691829 0.02481281 0.003051511
## 74      1 0.0029836472 0.1264593 0.2019681 0.09782125 0.005697492 0.02462696 0.003054682
## 75      1 0.0031257158 0.1265548 0.2011568 0.09789386 0.005703922 0.02454317 0.003059049
## 76      1 0.0032745492 0.1266571 0.2002761 0.09797469 0.005709961 0.02444007 0.003060940
## 77      1 0.0034304693 0.1267602 0.1994136 0.09805780 0.005714504 0.02440549 0.003065936
## 78      1 0.0035938137 0.1268722 0.1984503 0.09814836 0.005718712 0.02436154 0.003069996
## 79      1 0.0037649358 0.1269875 0.1974720 0.09824250 0.005716502 0.02430181 0.003067797
## 80      1 0.0039442061 0.1271124 0.1963887 0.09834316 0.005712351 0.02419828 0.003061138
## 81      1 0.0041320124 0.1272433 0.1952556 0.09844567 0.005705121 0.02405859 0.003051228
## 82      1 0.0043287613 0.1273861 0.1939813 0.09855466 0.005698734 0.02387929 0.003043569
## 83      1 0.0045348785 0.1275330 0.1926951 0.09866591 0.005692840 0.02375917 0.003038266
## 84      1 0.0047508102 0.1276909 0.1912852 0.09878621 0.005687934 0.02362865 0.003034210
## 85      1 0.0049770236 0.1278541 0.1898300 0.09890973 0.005679072 0.02348158 0.003028450
## 86      1 0.0052140083 0.1280312 0.1881955 0.09904293 0.005669511 0.02329129 0.003022277
## 87      1 0.0054622772 0.1282237 0.1863484 0.09919031 0.005655903 0.02298977 0.003013228
## 88      1 0.0057223677 0.1284315 0.1842822 0.09935079 0.005640721 0.02262829 0.003003959
## 89      1 0.0059948425 0.1286470 0.1821334 0.09951832 0.005625468 0.02233746 0.002997386
## 90      1 0.0062802914 0.1288776 0.1797575 0.09969603 0.005610533 0.02196863 0.002992295
## 91      1 0.0065793322 0.1291204 0.1772135 0.09988561 0.005599032 0.02165368 0.002992005
## 92      1 0.0068926121 0.1293819 0.1743770 0.10008914 0.005588324 0.02132520 0.002990617
## 93      1 0.0072208090 0.1296552 0.1713683 0.10029652 0.005573148 0.02105525 0.002986774
## 94      1 0.0075646333 0.1299499 0.1679861 0.10051799 0.005556051 0.02075031 0.002980449
## 95      1 0.0079248290 0.1302446 0.1646172 0.10073492 0.005537806 0.02051751 0.002976055
## 96      1 0.0083021757 0.1305541 0.1609747 0.10096336 0.005521288 0.02023450 0.002974015
## 97      1 0.0086974900 0.1308615 0.1573733 0.10119179 0.005500597 0.01994873 0.002967727
## 98      1 0.0091116276 0.1311845 0.1534685 0.10143495 0.005479771 0.01954826 0.002952125
## 99      1 0.0095454846 0.1314959 0.1497763 0.10167206 0.005456415 0.01915786 0.002930001
## 100     1 0.0100000000 0.1318179 0.1458583 0.10191631 0.005433767 0.01862530 0.002906214

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##                model.coef
## (Intercept)  5.001487e+00
## PC1         -1.567812e-03
## PC2         -3.493388e-03
## PC3         -1.446555e-03
## PC4         -1.129211e-03
## PC5          6.832443e-04
## PC6         -3.535998e-04
## PC7         -6.479233e-04
## PC8         -9.431297e-05
## PC9         -7.560733e-05
## PC11        -1.986916e-03
## PC12        -1.646537e-03
## PC13         1.031739e-03
## PC14         8.286380e-04
## PC16         1.218077e-03
## PC17        -6.625690e-04
## PC18        -1.331233e-03
## PC20         1.562215e-03
## PC21         9.138747e-05
## PC22         2.633970e-04
## PC23         9.951633e-04
## PC24        -2.923723e-03
## PC25         6.556743e-04
## PC26         1.043674e-03
## PC27         1.190914e-03
## PC28         1.988490e-04
## PC29         1.081735e-03
## PC31        -4.993303e-04
## PC32        -2.243598e-03
## PC33         7.243084e-04
## PC34         3.488786e-03
## PC37        -7.877621e-04
## PC38         5.062938e-04
## PC44         9.732651e-04
## PC45        -5.988759e-04
## PC47        -8.665171e-04
## PC48         5.176597e-04
## PC49         3.980649e-04
## PC51         5.233114e-05
## PC57        -9.055802e-04
## PC58         1.027779e-04
## PC59         1.674679e-03
## PC60        -4.742266e-04
## PC63        -1.446109e-03
## PC64        -1.152212e-03
## PC65        -3.452139e-04
## PC66        -1.486194e-03
## PC68         1.875676e-03
## PC69         6.968924e-04
## PC71         2.099835e-03
## PC73         9.164869e-05
## PC74        -8.155097e-04
## PC75        -2.531314e-03
## PC77         9.025942e-04
## PC78         6.248711e-04
## PC79         1.606224e-03
## PC80        -3.147085e-04
## PC81         2.972511e-03
## PC83        -1.781444e-03
## PC84         2.622673e-03
## PC85         3.721049e-03
## PC86        -1.259845e-03
## PC87         7.057308e-03
## PC88        -9.870596e-04
## PC89        -1.646929e-03
## PC90        -1.071178e-03
## PC94        -2.892807e-03
## PC96        -2.865481e-03
## PC97        -8.956921e-04
## PC98        -1.460496e-04
## PC99        -1.300748e-03
## PC102       -1.445495e-03
## PC103        1.866757e-03
## PC104       -2.802670e-03
## PC105        2.141194e-03
## PC106        2.722359e-03
## PC107        3.387021e-04
## PC109        1.185912e-03
## PC111       -2.291964e-03
## PC113        6.320066e-04
## PC114       -1.879096e-03
## PC115       -4.902552e-03
## PC118        1.455565e-03
## PC119       -1.946489e-03
## PC120        1.913643e-04
## PC122        1.619960e-03
## PC123       -1.432900e-03
## PC124        7.632978e-04
## PC125        1.113433e-03
## PC127        1.413671e-03
## PC128       -1.053886e-03
## PC131       -3.044589e-03
## PC132        1.754724e-04
## PC133       -5.653046e-04
## PC134        3.634871e-03
## PC135        2.042061e-03
## PC136        3.101532e-04
## PC137       -3.647538e-06
## PC138        9.023092e-04
## PC139       -2.505786e-03
## PC140       -3.107734e-04
## PC143        9.772412e-04
## PC144        1.547501e-03
## PC145        9.053174e-04
## PC146        3.694168e-03
## PC147       -5.613169e-05
## PC148       -1.095390e-03
## PC151        2.012338e-03
## PC153        2.277421e-03
## PC154       -2.595253e-03
## PC155        2.353435e-03
## PC156        2.482936e-03
## PC159        4.829031e-03
## PC161        5.273463e-04
## PC162       -4.226100e-03
## PC163        1.597400e-03

Test

if (algo.LASSO.caret == TRUE){
  test.model(model.LASSO.caret, data.test
             ,method = 'glmnet',subopt = "LASSO"
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.698   4.966   5.012   5.001   5.046   5.125 
## [1] "glmnet LASSO Test MSE: 0.0137364202892418"
## [1] "glmnet LASSO Test RMSE: 0.117202475610551"
## [1] "glmnet LASSO Test MSE (Org Scale): 81.5488598346235"
## [1] "glmnet LASSO Test RMSE (Org Scale): 9.03044073313277"

LARS with CV

Train

if (algo.LARS.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "lars"
                                   ,subopt = 'NULL'
                                   ,feature.names = feature.names)
  model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.727 on full training set
## Least Angle Regression 
## 
## 5584 samples
##  164 predictor
## 
## Pre-processing: centered (164), scaled (164) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   fraction    RMSE       Rsquared    MAE       
##   0.00000000  0.1410824         NaN  0.10856265
##   0.01010101  0.1396190  0.08479660  0.10753014
##   0.02020202  0.1383496  0.08479660  0.10662638
##   0.03030303  0.1372793  0.08479660  0.10586782
##   0.04040404  0.1364220  0.08507658  0.10525349
##   0.05050505  0.1357349  0.09462889  0.10476920
##   0.06060606  0.1351147  0.10371154  0.10433708
##   0.07070707  0.1345335  0.11191090  0.10392114
##   0.08080808  0.1339773  0.11952394  0.10351470
##   0.09090909  0.1334533  0.12640891  0.10312749
##   0.10101010  0.1329667  0.13235060  0.10276612
##   0.11111111  0.1325370  0.13736321  0.10244916
##   0.12121212  0.1321326  0.14211319  0.10214952
##   0.13131313  0.1317426  0.14684531  0.10185521
##   0.14141414  0.1313730  0.15130900  0.10157582
##   0.15151515  0.1310213  0.15546194  0.10130976
##   0.16161616  0.1306822  0.15951206  0.10105631
##   0.17171717  0.1303581  0.16330128  0.10081820
##   0.18181818  0.1300486  0.16687014  0.10058924
##   0.19191919  0.1297470  0.17031546  0.10036436
##   0.20202020  0.1294549  0.17359608  0.10014450
##   0.21212121  0.1291762  0.17664077  0.09992976
##   0.22222222  0.1289202  0.17933752  0.09972863
##   0.23232323  0.1286815  0.18178665  0.09954440
##   0.24242424  0.1284530  0.18409640  0.09936805
##   0.25252525  0.1282385  0.18622319  0.09920272
##   0.26262626  0.1280456  0.18808192  0.09905471
##   0.27272727  0.1278731  0.18966457  0.09892508
##   0.28282828  0.1277165  0.19104937  0.09880806
##   0.29292929  0.1275687  0.19236514  0.09869474
##   0.30303030  0.1274335  0.19355952  0.09859085
##   0.31313131  0.1273096  0.19464270  0.09849490
##   0.32323232  0.1271940  0.19564758  0.09840508
##   0.33333333  0.1270858  0.19660302  0.09831958
##   0.34343434  0.1269869  0.19745068  0.09823938
##   0.35353535  0.1268943  0.19823921  0.09816448
##   0.36363636  0.1268057  0.19900288  0.09809086
##   0.37373737  0.1267219  0.19972742  0.09802236
##   0.38383838  0.1266426  0.20040861  0.09795984
##   0.39393939  0.1265682  0.20103959  0.09790044
##   0.40404040  0.1264955  0.20167453  0.09784403
##   0.41414141  0.1264258  0.20229332  0.09779194
##   0.42424242  0.1263631  0.20283250  0.09774520
##   0.43434343  0.1263038  0.20333804  0.09770114
##   0.44444444  0.1262482  0.20380446  0.09765806
##   0.45454545  0.1261959  0.20424267  0.09761648
##   0.46464646  0.1261474  0.20463777  0.09757743
##   0.47474747  0.1261016  0.20500433  0.09754201
##   0.48484848  0.1260573  0.20536102  0.09750848
##   0.49494949  0.1260167  0.20567547  0.09747754
##   0.50505051  0.1259778  0.20597735  0.09744764
##   0.51515152  0.1259409  0.20626145  0.09742004
##   0.52525253  0.1259067  0.20651668  0.09739453
##   0.53535354  0.1258739  0.20676453  0.09736963
##   0.54545455  0.1258437  0.20698811  0.09734732
##   0.55555556  0.1258159  0.20718672  0.09732584
##   0.56565657  0.1257890  0.20738323  0.09730370
##   0.57575758  0.1257630  0.20757568  0.09728164
##   0.58585859  0.1257376  0.20776859  0.09725977
##   0.59595960  0.1257128  0.20796257  0.09723711
##   0.60606061  0.1256890  0.20815266  0.09721335
##   0.61616162  0.1256663  0.20833647  0.09719058
##   0.62626263  0.1256464  0.20849136  0.09717154
##   0.63636364  0.1256280  0.20863479  0.09715516
##   0.64646465  0.1256101  0.20878348  0.09713951
##   0.65656566  0.1255937  0.20891792  0.09712466
##   0.66666667  0.1255792  0.20903685  0.09711118
##   0.67676768  0.1255655  0.20915252  0.09709860
##   0.68686869  0.1255535  0.20925350  0.09708776
##   0.69696970  0.1255440  0.20933102  0.09707866
##   0.70707071  0.1255367  0.20938725  0.09707057
##   0.71717172  0.1255322  0.20941496  0.09706376
##   0.72727273  0.1255315  0.20940298  0.09705871
##   0.73737374  0.1255332  0.20937069  0.09705461
##   0.74747475  0.1255364  0.20932651  0.09705262
##   0.75757576  0.1255426  0.20925415  0.09705270
##   0.76767677  0.1255520  0.20914958  0.09705575
##   0.77777778  0.1255636  0.20902710  0.09706000
##   0.78787879  0.1255774  0.20888612  0.09706624
##   0.79797980  0.1255932  0.20872953  0.09707492
##   0.80808081  0.1256106  0.20856140  0.09708471
##   0.81818182  0.1256296  0.20838106  0.09709555
##   0.82828283  0.1256505  0.20818702  0.09710848
##   0.83838384  0.1256740  0.20797185  0.09712385
##   0.84848485  0.1256984  0.20775283  0.09714107
##   0.85858586  0.1257239  0.20752938  0.09715873
##   0.86868687  0.1257506  0.20730056  0.09717707
##   0.87878788  0.1257787  0.20706336  0.09719647
##   0.88888889  0.1258077  0.20682243  0.09721685
##   0.89898990  0.1258381  0.20657332  0.09723906
##   0.90909091  0.1258704  0.20631055  0.09726306
##   0.91919192  0.1259032  0.20604827  0.09728710
##   0.92929293  0.1259371  0.20578231  0.09731117
##   0.93939394  0.1259726  0.20550522  0.09733714
##   0.94949495  0.1260093  0.20522318  0.09736487
##   0.95959596  0.1260464  0.20494349  0.09739350
##   0.96969697  0.1260854  0.20464862  0.09742430
##   0.97979798  0.1261263  0.20434020  0.09745741
##   0.98989899  0.1261680  0.20403005  0.09749160
##   1.00000000  0.1262107  0.20371699  0.09752633
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.7272727.

##     fraction
## 73 0.7272727
## Warning: Removed 1 rows containing missing values (geom_point).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##           PC1           PC2           PC3           PC4           PC5           PC6           PC7           PC8 
## -1.807777e-02 -3.974089e-02 -1.641502e-02 -1.259014e-02  7.354240e-03 -3.806200e-03 -6.818266e-03 -9.759112e-04 
##           PC9          PC11          PC12          PC13          PC14          PC16          PC17          PC18 
## -7.576262e-04 -1.836135e-02 -1.436554e-02  8.861464e-03  6.884288e-03  9.887667e-03 -5.061638e-03 -9.755651e-03 
##          PC20          PC21          PC22          PC23          PC24          PC25          PC26          PC27 
##  1.040368e-02  5.814544e-04  1.073440e-03  3.303890e-03 -8.262577e-03  1.657465e-03  2.566088e-03  2.910117e-03 
##          PC28          PC29          PC31          PC32          PC33          PC34          PC37          PC38 
##  4.753624e-04  2.377255e-03 -9.977010e-04 -4.454384e-03  1.402403e-03  6.408199e-03 -1.288745e-03  7.947756e-04 
##          PC44          PC45          PC47          PC48          PC49          PC51          PC57          PC58 
##  1.442167e-03 -8.847126e-04 -1.254266e-03  7.330733e-04  5.615982e-04  6.949503e-05 -1.219430e-03  1.360983e-04 
##          PC59          PC60          PC63          PC64          PC65          PC66          PC68          PC69 
##  2.285265e-03 -6.316474e-04 -1.915560e-03 -1.520356e-03 -4.537592e-04 -1.939916e-03  2.430650e-03  8.953422e-04 
##          PC71          PC73          PC74          PC75          PC77          PC78          PC79          PC80 
##  2.707261e-03  1.130220e-04 -1.028108e-03 -3.182552e-03  1.130317e-03  7.781756e-04  1.987858e-03 -3.790135e-04 
##          PC81          PC83          PC84          PC85          PC86          PC87          PC88          PC89 
##  3.634402e-03 -2.179009e-03  3.190434e-03  4.465104e-03 -1.511251e-03  8.388655e-03 -1.153598e-03 -1.959136e-03 
##          PC90          PC94          PC96          PC97          PC98          PC99         PC102         PC103 
## -1.266890e-03 -3.393798e-03 -3.316230e-03 -1.039651e-03 -1.656034e-04 -1.504194e-03 -1.649463e-03  2.148357e-03 
##         PC104         PC105         PC106         PC107         PC109         PC111         PC113         PC114 
## -3.214853e-03  2.446588e-03  3.126311e-03  3.846943e-04  1.349347e-03 -2.594311e-03  7.110745e-04 -2.126160e-03 
##         PC115         PC118         PC119         PC120         PC122         PC123         PC124         PC125 
## -5.522405e-03  1.627782e-03 -2.180681e-03  2.113176e-04  1.802282e-03 -1.590026e-03  8.440231e-04  1.228383e-03 
##         PC127         PC128         PC131         PC132         PC133         PC134         PC135         PC136 
##  1.578087e-03 -1.165117e-03 -3.356972e-03  1.895286e-04 -6.182413e-04  4.022129e-03  2.248923e-03  3.351567e-04 
##         PC138         PC139         PC140         PC143         PC144         PC145         PC146         PC147 
##  9.799095e-04 -2.740022e-03 -3.325096e-04  1.053795e-03  1.676228e-03  9.755513e-04  3.976309e-03 -5.701974e-05 
##         PC148         PC151         PC153         PC154         PC155         PC156         PC159         PC161 
## -1.188738e-03  2.150972e-03  2.444114e-03 -2.772852e-03  2.512345e-03  2.643782e-03  5.154754e-03  5.587627e-04 
##         PC162         PC163 
## -4.459427e-03  1.686526e-03

Test

if (algo.LARS.caret == TRUE){
  test.model(model.LARS.caret, data.test
             ,method = 'lars',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.699   4.966   5.012   5.001   5.046   5.125 
## [1] "lars  Test MSE: 0.0137364263489851"
## [1] "lars  Test RMSE: 0.117202501462149"
## [1] "lars  Test MSE (Org Scale): 81.5488137763212"
## [1] "lars  Test RMSE (Org Scale): 9.03043818296328"

Session Info

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2             knitr_1.20                 htmltools_0.3.6            reshape2_1.4.3            
##  [5] lars_1.2                   doParallel_1.0.14          iterators_1.0.10           caret_6.0-81              
##  [9] leaps_3.0                  ggforce_0.1.3              rlist_0.4.6.1              car_3.0-2                 
## [13] carData_3.0-2              bestNormalize_1.3.0        scales_1.0.0               onewaytests_2.0           
## [17] caTools_1.17.1.1           mosaic_1.5.0               mosaicData_0.17.0          ggformula_0.9.1           
## [21] ggstance_0.3.1             lattice_0.20-35            DT_0.5                     ggiraph_0.6.0             
## [25] investr_1.4.0              glmnet_2.0-16              foreach_1.4.4              Matrix_1.2-14             
## [29] MASS_7.3-50                PerformanceAnalytics_1.5.2 xts_0.11-2                 zoo_1.8-4                 
## [33] forcats_0.3.0              stringr_1.3.1              dplyr_0.7.8                purrr_0.2.5               
## [37] readr_1.3.1                tidyr_0.8.2                tibble_1.4.2               ggplot2_3.1.0             
## [41] tidyverse_1.2.1            usdm_1.1-18                raster_2.8-4               sp_1.3-1                  
## [45] pacman_0.5.0              
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.2.0       backports_1.1.3    plyr_1.8.4         lazyeval_0.2.1     splines_3.5.1      mycor_0.1.1       
##  [7] crosstalk_1.0.0    leaflet_2.0.2      digest_0.6.18      magrittr_1.5       mosaicCore_0.6.0   openxlsx_4.1.0    
## [13] recipes_0.1.4      modelr_0.1.2       gower_0.1.2        colorspace_1.3-2   rvest_0.3.2        ggrepel_0.8.0     
## [19] haven_2.0.0        crayon_1.3.4       jsonlite_1.5       bindr_0.1.1        survival_2.42-3    glue_1.3.0        
## [25] registry_0.5       gtable_0.2.0       ppcor_1.1          ipred_0.9-8        abind_1.4-5        rngtools_1.3.1    
## [31] bibtex_0.4.2       Rcpp_1.0.0         xtable_1.8-3       units_0.6-2        foreign_0.8-70     stats4_3.5.1      
## [37] lava_1.6.4         prodlim_2018.04.18 htmlwidgets_1.3    httr_1.4.0         RColorBrewer_1.1-2 pkgconfig_2.0.2   
## [43] farver_1.1.0       nnet_7.3-12        labeling_0.3       tidyselect_0.2.5   rlang_0.3.1        later_0.7.5       
## [49] munsell_0.5.0      cellranger_1.1.0   tools_3.5.1        cli_1.0.1          generics_0.0.2     moments_0.14      
## [55] sjlabelled_1.0.17  broom_0.5.1        evaluate_0.12      ggdendro_0.1-20    yaml_2.2.0         ModelMetrics_1.2.2
## [61] zip_2.0.1          nlme_3.1-137       doRNG_1.7.1        mime_0.6           xml2_1.2.0         compiler_3.5.1    
## [67] rstudioapi_0.8     curl_3.2           tweenr_1.0.1       stringi_1.2.4      gdtools_0.1.7      pillar_1.3.1      
## [73] data.table_1.11.8  bitops_1.0-6       insight_0.1.2      httpuv_1.4.5       R6_2.3.0           promises_1.0.1    
## [79] gridExtra_2.3      rio_0.5.16         codetools_0.2-15   assertthat_0.2.0   pkgmaker_0.27      withr_2.1.2       
## [85] nortest_1.0-4      mgcv_1.8-24        hms_0.4.2          quadprog_1.5-5     grid_3.5.1         rpart_4.1-13      
## [91] timeDate_3043.102  class_7.3-14       rmarkdown_1.11     shiny_1.2.0        lubridate_1.7.4